7#include "xfa/fxfa/cxfa_textparser.h"
12#include "core/fxcrt/css/cfx_css.h"
13#include "core/fxcrt/css/cfx_csscomputedstyle.h"
14#include "core/fxcrt/css/cfx_cssdeclaration.h"
15#include "core/fxcrt/css/cfx_cssstyleselector.h"
16#include "core/fxcrt/css/cfx_cssstylesheet.h"
17#include "core/fxcrt/fx_codepage.h"
18#include "core/fxcrt/xml/cfx_xmlelement.h"
19#include "core/fxcrt/xml/cfx_xmlnode.h"
20#include "core/fxge/fx_font.h"
21#include "third_party/base/check.h"
22#include "third_party/base/notreached.h"
23#include "xfa/fgas/font/cfgas_fontmgr.h"
24#include "xfa/fgas/font/cfgas_gefont.h"
25#include "xfa/fxfa/cxfa_ffapp.h"
26#include "xfa/fxfa/cxfa_ffdoc.h"
27#include "xfa/fxfa/cxfa_fontmgr.h"
28#include "xfa/fxfa/cxfa_textprovider.h"
29#include "xfa/fxfa/cxfa_texttabstopscontext.h"
30#include "xfa/fxfa/parser/cxfa_font.h"
31#include "xfa/fxfa/parser/cxfa_measurement.h"
32#include "xfa/fxfa/parser/cxfa_para.h"
36enum class TabStopStatus {
46WideString GetLowerCaseElementAttributeOrDefault(
47 const CFX_XMLElement* pElement,
48 const WideString& wsName,
49 const WideString& wsDefaultValue) {
65 m_mapXMLNodeToParseContext.clear();
74 m_pSelector = std::make_unique<CFX_CSSStyleSelector>();
77 m_pSelector->SetDefaultFontSize(font ? font->GetFontSize() : 10.0f);
83 m_cssInitialized =
true;
84 auto uaSheet = LoadDefaultSheetStyle();
85 m_pSelector->SetUAStyleSheet(std::move(uaSheet));
86 m_pSelector->UpdateStyleIndex();
89std::unique_ptr<CFX_CSSStyleSheet>
CXFA_TextParser::LoadDefaultSheetStyle() {
90 static const char kStyle[] =
91 "html,body,ol,p,ul{display:block}"
92 "li{display:list-item}"
93 "ol,ul{padding-left:33px;margin:1.12em 0}"
94 "ol{list-style-type:decimal}"
95 "a{color:#0000ff;text-decoration:underline}"
96 "b{font-weight:bolder}"
97 "i{font-style:italic}"
98 "sup{vertical-align:+15em;font-size:.66em}"
99 "sub{vertical-align:-15em;font-size:.66em}";
102 if (!sheet->LoadBuffer(ws.AsStringView()))
111 auto pStyle = m_pSelector->CreateComputedStyle(
nullptr);
112 float fLineHeight = 0;
113 float fFontSize = 10;
119 pStyle->SetTextIndent(indent);
138 NOTREACHED_NORETURN();
140 pStyle->SetTextAlign(hAlign);
146 pStyle->SetMarginWidth(rtMarginWidth);
159 pStyle->SetLetterSpacing(letterSpacing);
168 pStyle->SetTextDecoration(dwDecoration);
170 pStyle->SetLineHeight(fLineHeight);
171 pStyle->SetFontSize(fFontSize);
176 const CFX_CSSComputedStyle* pParentStyle) {
177 auto pNewStyle = m_pSelector->CreateComputedStyle(pParentStyle);
187 pNewStyle->SetTextDecoration(dwDecoration);
188 pNewStyle->SetNumberVerticalAlign(fBaseLine);
192 pNewStyle->SetMarginWidth(*pRect);
198 RetainPtr<
const CFX_CSSComputedStyle> pParentStyle) {
199 auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
200 if (it == m_mapXMLNodeToParseContext.end())
203 Context* pContext = it->second.get();
209 auto tagProvider = ParseTagInfo(pXMLNode);
210 if (tagProvider->m_bContent)
213 auto pStyle = CreateStyle(pParentStyle);
214 m_pSelector->ComputeStyle(pContext->GetDecls(),
215 tagProvider->GetAttribute(L"style"),
216 tagProvider->GetAttribute(L"align"), pStyle.Get());
222 if (!pXMLContainer || !pTextProvider || m_bParsed)
226 InitCSSData(pTextProvider);
227 auto pRootStyle = CreateRootStyle(pTextProvider);
228 ParseRichText(pXMLContainer, pRootStyle.Get());
232 const CFX_CSSComputedStyle* pParentStyle) {
236 auto tagProvider = ParseTagInfo(pXMLNode);
237 if (!tagProvider->m_bTagAvailable)
240 RetainPtr<CFX_CSSComputedStyle> pNewStyle;
241 if (!(tagProvider->GetTagName().EqualsASCII(
"body") &&
242 tagProvider->GetTagName().EqualsASCII(
"html"))) {
243 auto pTextContext =
std::make_unique<
Context>();
245 if (!tagProvider->m_bContent) {
247 m_pSelector->MatchDeclarations(tagProvider->GetTagName());
248 pNewStyle = CreateStyle(pParentStyle);
249 m_pSelector->ComputeStyle(declArray, tagProvider->GetAttribute(L"style"),
250 tagProvider->GetAttribute(L"align"),
253 if (!declArray.empty())
254 pTextContext->SetDecls(
std::move(declArray));
256 eDisplay = pNewStyle->GetDisplay();
258 pTextContext->SetDisplay(eDisplay);
259 m_mapXMLNodeToParseContext[pXMLNode] = std::move(pTextContext);
262 for (
CFX_XMLNode* pXMLChild = pXMLNode->GetFirstChild(); pXMLChild;
263 pXMLChild = pXMLChild->GetNextSibling()) {
264 ParseRichText(pXMLChild, pNewStyle.Get());
269 static const uint32_t s_XFATagName[] = {
284 return std::binary_search(
std::begin(s_XFATagName),
std::end(s_XFATagName),
285 FX_HashCode_GetLoweredW(wsName.AsStringView()));
289std::unique_ptr<CXFA_TextParser::TagProvider>
CXFA_TextParser::ParseTagInfo(
291 auto tagProvider =
std::make_unique<TagProvider>();
295 tagProvider->SetTagName(wsName);
299 tagProvider->SetAttribute(L"style", wsValue);
304 tagProvider->m_bTagAvailable =
true;
305 tagProvider->m_bContent =
true;
317 const CFX_CSSComputedStyle* pStyle)
const {
340 const CFX_CSSComputedStyle* pStyle)
const {
341 WideString wsFamily = L"Courier";
342 uint32_t dwStyle = 0;
353 absl::optional<WideString> last_family = pStyle->GetLastFontFamily();
354 if (last_family.has_value())
355 wsFamily = last_family.value();
369 const CFX_CSSComputedStyle* pStyle)
const {
378 const CFX_CSSComputedStyle* pStyle,
386 auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
387 if (it != m_mapXMLNodeToParseContext.end()) {
388 Context* pContext = it->second.get();
389 if (pContext && pContext->GetParentStyle() &&
390 pContext->GetParentStyle()->GetCustomStyle(
391 L"xfa-font-horizontal-scale", &wsValue)) {
395 pXMLNode = pXMLNode->GetParent();
404 const CFX_CSSComputedStyle* pStyle)
const {
417 const CFX_CSSComputedStyle* pStyle)
const {
432 const CFX_CSSComputedStyle* pStyle)
const {
444 const CFX_CSSComputedStyle* pStyle)
const {
447 pStyle->GetTextDecoration();
455 const CFX_CSSComputedStyle* pStyle)
const {
464 const CFX_CSSComputedStyle* pStyle)
const {
477 const CFX_CSSComputedStyle* pStyle,
479 float fVerScale)
const {
480 float fLineHeight = 0;
491 if (fLineHeight < 0.1f)
492 fLineHeight = fFontSize;
494 fLineHeight =
std::min(fLineHeight, fFontSize);
495 }
else if (fLineHeight < 0.1f) {
498 fLineHeight *= fVerScale;
506 return absl::nullopt;
510 return absl::nullopt;
513 if (wsAttr.IsEmpty())
514 return absl::nullopt;
516 if (wsAttr
[0
] == L'#')
520 GetLowerCaseElementAttributeOrDefault(pElement, L"xfa:embedType", L"som");
521 if (!ws.EqualsASCII(
"uri"))
522 return absl::nullopt;
524 ws
= GetLowerCaseElementAttributeOrDefault(pElement, L"xfa:embedMode",
526 if (!(ws.EqualsASCII(
"raw") || ws.EqualsASCII(
"formatted")))
527 return absl::nullopt;
529 return pTextProvider->GetEmbeddedObj(wsAttr);
534 auto it = m_mapXMLNodeToParseContext.find(pXMLNode);
535 return it != m_mapXMLNodeToParseContext.end() ? it->second.get() :
nullptr;
540 if (!pStyle || !pTabstopContext)
549 pdfium::span<
const wchar_t> spTabStops = wsValue.span();
553 TabStopStatus eStatus = TabStopStatus::None;
554 while (iCur < spTabStops.size()) {
555 wchar_t ch = spTabStops[iCur];
557 case TabStopStatus::None:
561 eStatus = TabStopStatus::Alignment;
565 case TabStopStatus::Alignment:
567 wsAlign = WideStringView(spTabStops.subspan(iLast, iCur - iLast));
568 eStatus = TabStopStatus::StartLeader;
570 while (iCur < spTabStops.size() && spTabStops[iCur] <=
' ')
577 case TabStopStatus::StartLeader:
579 eStatus = TabStopStatus::Location;
582 while (iCur < spTabStops.size()) {
583 ch = spTabStops[iCur];
587 }
else if (ch ==
')') {
593 while (iCur < spTabStops.size() && spTabStops[iCur] <=
' ')
597 eStatus = TabStopStatus::Location;
600 case TabStopStatus::Location:
602 uint32_t dwHashCode = FX_HashCode_GetLoweredW(wsAlign.AsStringView());
604 WideStringView(spTabStops.subspan(iLast, iCur - iLast)));
608 eStatus = TabStopStatus::None;
618 uint32_t dwHashCode = FX_HashCode_GetLoweredW(wsAlign.AsStringView());
620 WideStringView(spTabStops.subspan(iLast, iCur - iLast)));
636 RetainPtr<
const CFX_CSSComputedStyle> style) {
637 m_pParentStyle = std::move(style);
641 std::vector<
const CFX_CSSDeclaration*>&& decl) {
642 decls_ = std::move(decl);
const CFX_XMLElement * ToXMLElement(const CFX_XMLNode *pNode)
CFX_CSSVerticalAlign GetVerticalAlign() const
CFX_CSSFontStyle GetFontStyle() const
uint16_t GetFontWeight() const
float GetLineHeight() const
const CFX_CSSRect * GetMarginWidth() const
float GetFontSize() const
bool GetCustomStyle(const WideString &wsName, WideString *pValue) const
float GetNumberVerticalAlign() const
CFX_CSSLength & Set(CFX_CSSLengthUnit eUnit, float fValue)
WideString GetLocalTagName() const
WideString GetAttribute(const WideString &name) const
virtual Type GetType() const =0
CXFA_FontMgr * GetXFAFontMgr() const
CXFA_FFApp * GetApp() const
RetainPtr< CFGAS_GEFont > GetFont(CXFA_FFDoc *hDoc, const WideString &wsFontFamily, uint32_t dwFontStyles)
float GetBaselineShift() const
float GetHorizontalScale()
float GetFontSize() const
XFA_AttributeValue GetUnderlinePeriod()
float ToUnit(XFA_Unit eUnit) const
XFA_AttributeValue GetHorizontalAlign()
XFA_AttributeValue GetVerticalAlign()
void SetDecls(std::vector< const CFX_CSSDeclaration * > &&decl)
void SetParentStyle(RetainPtr< const CFX_CSSComputedStyle > style)
int32_t GetUnderline(CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle) const
float GetFontSize(CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle) const
float GetTabInterval(const CFX_CSSComputedStyle *pStyle) const
bool GetTabstops(const CFX_CSSComputedStyle *pStyle, CXFA_TextTabstopsContext *pTabstopContext)
Context * GetParseContextFromMap(const CFX_XMLNode *pXMLNode)
XFA_AttributeValue GetUnderlinePeriod(CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle) const
XFA_AttributeValue GetVAlign(CXFA_TextProvider *pTextProvider) const
int32_t CountTabs(const CFX_CSSComputedStyle *pStyle) const
float GetBaseline(CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle) const
virtual ~CXFA_TextParser()
float GetLineHeight(CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle, bool bFirst, float fVerScale) const
bool TagValidate(const WideString &str) const
int32_t GetHorScale(CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle, const CFX_XMLNode *pXMLNode) const
RetainPtr< CFX_CSSComputedStyle > CreateRootStyle(CXFA_TextProvider *pTextProvider)
bool IsSpaceRun(const CFX_CSSComputedStyle *pStyle) const
RetainPtr< CFGAS_GEFont > GetFont(CXFA_FFDoc *doc, CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle) const
int32_t GetLinethrough(CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle) const
void DoParse(const CFX_XMLNode *pXMLContainer, CXFA_TextProvider *pTextProvider)
FX_ARGB GetColor(CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle) const
int32_t GetVerScale(CXFA_TextProvider *pTextProvider, const CFX_CSSComputedStyle *pStyle) const
absl::optional< WideString > GetEmbeddedObj(const CXFA_TextProvider *pTextProvider, const CFX_XMLNode *pXMLNode)
RetainPtr< CFX_CSSComputedStyle > ComputeStyle(const CFX_XMLNode *pXMLNode, RetainPtr< const CFX_CSSComputedStyle > pParentStyle)
CXFA_Para * GetParaIfExists()
CXFA_Font * GetFontIfExists()
void Append(uint32_t dwAlign, float fTabstops)
WideString & operator=(WideString &&that) noexcept
bool EqualsASCIINoCase(ByteStringView that) const
CharType operator[](const size_t index) const
WideString & operator=(const WideString &that)
static WideString FromASCII(ByteStringView str)
bool EqualsASCII(ByteStringView that) const
#define FXFONT_FORCE_BOLD