Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
cpdf_linkextract_unittest.cpp
Go to the documentation of this file.
1// Copyright 2015 The PDFium Authors
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "core/fpdftext/cpdf_linkextract.h"
6
7#include "testing/gtest/include/gtest/gtest.h"
8
9// Class to help test functions in CPDF_LinkExtract class.
10class CPDF_TestLinkExtract final : public CPDF_LinkExtract {
11 public:
13
14 private:
15 // Add test cases as friends to access protected member functions.
16 // Access CheckMailLink and CheckWebLink.
17 FRIEND_TEST(CPDF_LinkExtractTest, CheckMailLink);
18 FRIEND_TEST(CPDF_LinkExtractTest, CheckWebLink);
19};
20
22 CPDF_TestLinkExtract extractor;
23 // Check cases that fail to extract valid mail link.
24 const wchar_t* const kInvalidStrings[] = {
25 L"",
26 L"peter.pan", // '@' is required.
27 L"abc@server", // Domain name needs at least one '.'.
28 L"abc.@gmail.com", // '.' can not immediately precede '@'.
29 L"abc@xyz&q.org", // Domain name should not contain '&'.
30 L"abc@.xyz.org", // Domain name should not start with '.'.
31 L"fan@g..com" // Domain name should not have consecutive '.'
32 };
33 for (const wchar_t* input : kInvalidStrings) {
34 WideString text_str(input);
35 EXPECT_FALSE(extractor.CheckMailLink(&text_str)) << input;
36 }
37
38 // Check cases that can extract valid mail link.
39 // An array of {input_string, expected_extracted_email_address}.
40 const wchar_t* const kValidStrings[][2] = {
41 {L"peter@abc.d", L"peter@abc.d"},
42 {L"red.teddy.b@abc.com", L"red.teddy.b@abc.com"},
43 {L"abc_@gmail.com", L"abc_@gmail.com"}, // '_' is ok before '@'.
44 {L"dummy-hi@gmail.com",
45 L"dummy-hi@gmail.com"}, // '-' is ok in user name.
46 {L"a..df@gmail.com", L"df@gmail.com"}, // Stop at consecutive '.'.
47 {L".john@yahoo.com", L"john@yahoo.com"}, // Remove heading '.'.
48 {L"abc@xyz.org?/", L"abc@xyz.org"}, // Trim ending invalid chars.
49 {L"fan{abc@xyz.org", L"abc@xyz.org"}, // Trim beginning invalid chars.
50 {L"fan@g.com..", L"fan@g.com"}, // Trim the ending periods.
51 {L"CAP.cap@Gmail.Com", L"CAP.cap@Gmail.Com"}, // Keep the original case.
52 };
53 for (const auto& it : kValidStrings) {
54 const wchar_t* const input = it[0];
55 WideString text_str(input);
56 WideString expected_str(L"mailto:");
57 expected_str += it[1];
58 EXPECT_TRUE(extractor.CheckMailLink(&text_str)) << input;
59 EXPECT_STREQ(expected_str.c_str(), text_str.c_str());
60 }
61}
62
64 CPDF_TestLinkExtract extractor;
65 // Check cases that fail to extract valid web link.
66 // The last few are legit web addresses that we don't handle now.
67 const wchar_t* const kInvalidCases[] = {
68 L"", // Blank.
69 L"http", // No colon.
70 L"www.", // Missing domain.
71 L"https-and-www", // Dash not colon.
72 L"http:/abc.com", // Missing slash.
73 L"http://((()),", // Only invalid chars in host name.
74 L"ftp://example.com", // Ftp scheme is not supported.
75 L"http:example.com", // Missing slashes.
76 L"http//[example.com", // Invalid IPv6 address.
77 L"http//[00:00:00:00:00:00", // Invalid IPv6 address.
78 L"http//[]", // Empty IPv6 address.
79 L"abc.example.com", // URL without scheme.
80 };
81 for (const wchar_t* input : kInvalidCases) {
82 auto maybe_link = extractor.CheckWebLink(input);
83 EXPECT_FALSE(maybe_link.has_value()) << input;
84 }
85
86 // Check cases that can extract valid web link.
87 // An array of {input_string, expected_extracted_web_link}.
88 struct ValidCase {
89 const wchar_t* const input_string;
90 const wchar_t* const url_extracted;
91 const size_t start_offset;
92 const size_t count;
93 };
94 const ValidCase kValidCases[] = {
95 {L"http://www.example.com", L"http://www.example.com", 0,
96 22}, // standard URL.
97 {L"http://www.example.com:88", L"http://www.example.com:88", 0,
98 25}, // URL with port number.
99 {L"http://test@www.example.com", L"http://test@www.example.com", 0,
100 27}, // URL with username.
101 {L"http://test:test@example.com", L"http://test:test@example.com", 0,
102 28}, // URL with username and password.
103 {L"http://example", L"http://example", 0,
104 14}, // URL with short domain name.
105 {L"http////www.server", L"http://www.server", 8,
106 10}, // URL starts with "www.".
107 {L"http:/www.abc.com", L"http://www.abc.com", 6,
108 11}, // URL starts with "www.".
109 {L"www.a.b.c", L"http://www.a.b.c", 0, 9}, // URL starts with "www.".
110 {L"https://a.us", L"https://a.us", 0, 12}, // Secure http URL.
111 {L"https://www.t.us", L"https://www.t.us", 0, 16}, // Secure http URL.
112 {L"www.example-test.com", L"http://www.example-test.com", 0,
113 20}, // '-' in host is ok.
114 {L"www.example.com,", L"http://www.example.com", 0,
115 15}, // Trim ending invalid chars.
116 {L"www.example.com;(", L"http://www.example.com", 0,
117 15}, // Trim ending invalid chars.
118 {L"test:www.abc.com", L"http://www.abc.com", 5,
119 11}, // Trim chars before URL.
120 {L"(http://www.abc.com)", L"http://www.abc.com", 1,
121 18}, // Trim external brackets.
122 {L"0(http://www.abc.com)0", L"http://www.abc.com", 2,
123 18}, // Trim chars outside brackets as well.
124 {L"0(www.abc.com)0", L"http://www.abc.com", 2,
125 11}, // Links without http should also have brackets trimmed.
126 {L"http://www.abc.com)0", L"http://www.abc.com)0", 0,
127 20}, // Do not trim brackets that were not opened.
128 {L"{(<http://www.abc.com>)}", L"http://www.abc.com", 3,
129 18}, // Trim chars with multiple levels of brackets.
130 {L"[http://www.abc.com/z(1)]", L"http://www.abc.com/z(1)", 1,
131 23}, // Brackets opened inside the URL should not be trimmed.
132 {L"(http://www.abc.com/z(1))", L"http://www.abc.com/z(1)", 1,
133 23}, // Brackets opened inside the URL should not be trimmed.
134 {L"\"http://www.abc.com\"", L"http://www.abc.com", 1,
135 18}, // External quotes can also be escaped
136 {L"www.g.com..", L"http://www.g.com..", 0, 11}, // Leave ending periods.
137
138 // Web links can contain IP addresses too.
139 {L"http://192.168.0.1", L"http://192.168.0.1", 0, 18}, // IPv4 address.
140 {L"http://192.168.0.1:80", L"http://192.168.0.1:80", 0,
141 21}, // IPv4 address with port.
142 {L"http://[aa::00:bb::00:cc:00]", L"http://[aa::00:bb::00:cc:00]", 0,
143 28}, // IPv6 reference.
144 {L"http://[aa::00:bb::00:cc:00]:12", L"http://[aa::00:bb::00:cc:00]:12",
145 0, 31}, // IPv6 reference with port.
146 {L"http://[aa]:12", L"http://[aa]:12", 0,
147 14}, // Not validate IP address.
148 {L"http://[aa]:12abc", L"http://[aa]:12", 0,
149 14}, // Trim for IPv6 address.
150 {L"http://[aa]:", L"http://[aa]", 0, 11}, // Trim for IPv6 address.
151
152 // Path and query parts can be anything.
153 {L"www.abc.com/#%%^&&*(", L"http://www.abc.com/#%%^&&*(", 0, 20},
154 {L"www.a.com/#a=@?q=rr&r=y", L"http://www.a.com/#a=@?q=rr&r=y", 0, 23},
155 {L"http://a.com/1/2/3/4\5\6", L"http://a.com/1/2/3/4\5\6", 0, 22},
156 {L"http://www.example.com/foo;bar", L"http://www.example.com/foo;bar", 0,
157 30},
158
159 // Invalid chars inside host name are ok as we don't validate them.
160 {L"http://ex[am]ple", L"http://ex[am]ple", 0, 16},
161 {L"http://:example.com", L"http://:example.com", 0, 19},
162 {L"http://((())/path?", L"http://((())/path?", 0, 18},
163 {L"http:////abc.server", L"http:////abc.server", 0, 19},
164
165 // Non-ASCII chars are not validated either.
166 {L"www.测试.net", L"http://www.测试.net", 0, 10},
167 {L"www.测试。net。", L"http://www.测试。net。", 0, 11},
168 {L"www.测试.net;", L"http://www.测试.net;", 0, 11},
169 };
170 for (const auto& it : kValidCases) {
171 auto maybe_link = extractor.CheckWebLink(it.input_string);
172 ASSERT_TRUE(maybe_link.has_value()) << it.input_string;
173 EXPECT_STREQ(it.url_extracted, maybe_link.value().m_strUrl.c_str());
174 EXPECT_EQ(it.start_offset, maybe_link.value().m_Start) << it.input_string;
175 EXPECT_EQ(it.count, maybe_link.value().m_Count) << it.input_string;
176 }
177}
CPDF_LinkExtract(const CPDF_TextPage *pTextPage)
bool CheckMailLink(WideString *str)
WideString & operator+=(const wchar_t *str)
const wchar_t * c_str() const
Definition widestring.h:81
WideString(const wchar_t *ptr)
TEST(FXCRYPT, MD5GenerateEmtpyData)