Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qrhid3d12.cpp
Go to the documentation of this file.
1// Copyright (C) 2023 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:significant reason:default
4
5#include "qrhid3d12_p.h"
6#include <qmath.h>
7#include <QtCore/private/qsystemerror_p.h>
8#include <comdef.h>
10#include "cs_mipmap_p.h"
11#include "cs_mipmap_3d_p.h"
12
13#if __has_include(<pix.h>)
14#include <pix.h>
15#define QRHI_D3D12_HAS_OLD_PIX
16#endif
17
18#ifdef __ID3D12Device2_INTERFACE_DEFINED__
19
20QT_BEGIN_NAMESPACE
21
22/*
23 Direct 3D 12 backend.
24*/
25
26/*!
27 \class QRhiD3D12InitParams
28 \inmodule QtGuiPrivate
29 \inheaderfile rhi/qrhi.h
30 \brief Direct3D 12 specific initialization parameters.
31
32 \note This is a RHI API with limited compatibility guarantees, see \l QRhi
33 for details.
34
35 A D3D12-based QRhi needs no special parameters for initialization. If
36 desired, enableDebugLayer can be set to \c true to enable the Direct3D
37 debug layer. This can be useful during development, but should be avoided
38 in production builds.
39
40 \badcode
41 QRhiD3D12InitParams params;
42 params.enableDebugLayer = true;
43 rhi = QRhi::create(QRhi::D3D12, &params);
44 \endcode
45
46 \note QRhiSwapChain should only be used in combination with QWindow
47 instances that have their surface type set to QSurface::Direct3DSurface.
48
49 \section2 Working with existing Direct3D 12 devices
50
51 When interoperating with another graphics engine, it may be necessary to
52 get a QRhi instance that uses the same Direct3D device. This can be
53 achieved by passing a pointer to a QRhiD3D12NativeHandles to
54 QRhi::create(). QRhi does not take ownership of any of the external
55 objects.
56
57 Sometimes, for example when using QRhi in combination with OpenXR, one will
58 want to specify which adapter to use, and optionally, which feature level
59 to request on the device, while leaving the device creation to QRhi. This
60 is achieved by leaving the device pointer set to null, while specifying the
61 adapter LUID and feature level.
62
63 Optionally the ID3D12CommandQueue can be specified as well, by setting \c
64 commandQueue to a non-null value.
65 */
66
67/*!
68 \variable QRhiD3D12InitParams::enableDebugLayer
69
70 When set to true, the debug layer is enabled, if installed and available.
71 The default value is false.
72*/
73
74/*!
75 \class QRhiD3D12NativeHandles
76 \inmodule QtGuiPrivate
77 \inheaderfile rhi/qrhi.h
78 \brief Holds the D3D12 device used by the QRhi.
79
80 \note The class uses \c{void *} as the type since including the COM-based
81 \c{d3d12.h} headers is not acceptable here. The actual types are
82 \c{ID3D12Device *} and \c{ID3D12CommandQueue *}.
83
84 \note This is a RHI API with limited compatibility guarantees, see \l QRhi
85 for details.
86 */
87
88/*!
89 \variable QRhiD3D12NativeHandles::dev
90
91 Points to a
92 \l{https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nn-d3d12-id3d12device}{ID3D12Device}
93 or left set to \nullptr if no existing device is to be imported.
94*/
95
96/*!
97 \variable QRhiD3D12NativeHandles::minimumFeatureLevel
98
99 Specifies the \b minimum feature level passed to
100 \l{https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-d3d12createdevice}{D3D12CreateDevice()}.
101 When not set, \c{D3D_FEATURE_LEVEL_11_0} is used. See
102 \l{https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-feature-levels}{this
103 page} for details.
104
105 Relevant only when QRhi creates the device, ignored when importing a device
106 and device context.
107*/
108
109/*!
110 \variable QRhiD3D12NativeHandles::adapterLuidLow
111
112 The low part of the local identifier (LUID) of the DXGI adapter to use.
113 Relevant only when QRhi creates the device, ignored when importing a device
114 and device context.
115*/
116
117/*!
118 \variable QRhiD3D12NativeHandles::adapterLuidHigh
119
120 The high part of the local identifier (LUID) of the DXGI adapter to use.
121 Relevant only when QRhi creates the device, ignored when importing a device
122 and device context.
123*/
124
125/*!
126 \variable QRhiD3D12NativeHandles::commandQueue
127
128 When set, must point to a
129 \l{https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nn-d3d12-id3d12commandqueue}{ID3D12CommandQueue}.
130 It allows to optionally import a command queue as well, in addition to a
131 device.
132*/
133
134/*!
135 \class QRhiD3D12CommandBufferNativeHandles
136 \inmodule QtGuiPrivate
137 \inheaderfile rhi/qrhi.h
138 \brief Holds the ID3D12GraphicsCommandList1 object that is backing a QRhiCommandBuffer.
139
140 \note The command list object is only guaranteed to be valid, and
141 in recording state, while recording a frame. That is, between a
142 \l{QRhi::beginFrame()}{beginFrame()} - \l{QRhi::endFrame()}{endFrame()} or
143 \l{QRhi::beginOffscreenFrame()}{beginOffscreenFrame()} -
144 \l{QRhi::endOffscreenFrame()}{endOffscreenFrame()} pair.
145
146 \note This is a RHI API with limited compatibility guarantees, see \l QRhi
147 for details.
148 */
149
150/*!
151 \variable QRhiD3D12CommandBufferNativeHandles::commandList
152*/
153
154// https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-feature-levels
155static const D3D_FEATURE_LEVEL MIN_FEATURE_LEVEL = D3D_FEATURE_LEVEL_11_0;
156
157QRhiD3D12::QRhiD3D12(QRhiD3D12InitParams *params, QRhiD3D12NativeHandles *importParams)
158{
159 debugLayer = params->enableDebugLayer;
160 if (importParams) {
161 if (importParams->dev) {
162 ID3D12Device *d3d12Device = reinterpret_cast<ID3D12Device *>(importParams->dev);
163 if (SUCCEEDED(d3d12Device->QueryInterface(__uuidof(ID3D12Device2), reinterpret_cast<void **>(&dev)))) {
164 // get rid of the ref added by QueryInterface
165 d3d12Device->Release();
166 importedDevice = true;
167 } else {
168 qWarning("ID3D12Device2 not supported, cannot import device");
169 }
170 }
171 if (importParams->commandQueue) {
172 cmdQueue = reinterpret_cast<ID3D12CommandQueue *>(importParams->commandQueue);
173 importedCommandQueue = true;
174 }
175 minimumFeatureLevel = D3D_FEATURE_LEVEL(importParams->minimumFeatureLevel);
176 adapterLuid.LowPart = importParams->adapterLuidLow;
177 adapterLuid.HighPart = importParams->adapterLuidHigh;
178 }
179}
180
181template <class Int>
182inline Int aligned(Int v, Int byteAlign)
183{
184 return (v + byteAlign - 1) & ~(byteAlign - 1);
185}
186
187static inline UINT calcSubresource(UINT mipSlice, UINT arraySlice, UINT mipLevels)
188{
189 return mipSlice + arraySlice * mipLevels;
190}
191
192static inline QD3D12RenderTargetData *rtData(QRhiRenderTarget *rt)
193{
194 switch (rt->resourceType()) {
195 case QRhiResource::SwapChainRenderTarget:
196 return &QRHI_RES(QD3D12SwapChainRenderTarget, rt)->d;
197 case QRhiResource::TextureRenderTarget:
198 return &QRHI_RES(QD3D12TextureRenderTarget, rt)->d;
199 break;
200 default:
201 break;
202 }
203 Q_UNREACHABLE_RETURN(nullptr);
204}
205
206bool QRhiD3D12::create(QRhi::Flags flags)
207{
208 rhiFlags = flags;
209
210 UINT factoryFlags = 0;
211 if (debugLayer)
212 factoryFlags |= DXGI_CREATE_FACTORY_DEBUG;
213 HRESULT hr = CreateDXGIFactory2(factoryFlags, __uuidof(IDXGIFactory2), reinterpret_cast<void **>(&dxgiFactory));
214 if (FAILED(hr)) {
215 // retry without debug, if it was requested (to match D3D11 backend behavior)
216 if (debugLayer) {
217 qCDebug(QRHI_LOG_INFO, "Debug layer was requested but is not available. "
218 "Attempting to create DXGIFactory2 without it.");
219 factoryFlags &= ~DXGI_CREATE_FACTORY_DEBUG;
220 hr = CreateDXGIFactory2(factoryFlags, __uuidof(IDXGIFactory2), reinterpret_cast<void **>(&dxgiFactory));
221 }
222 if (SUCCEEDED(hr)) {
223 debugLayer = false;
224 } else {
225 qWarning("CreateDXGIFactory2() failed to create DXGI factory: %s",
226 qPrintable(QSystemError::windowsComString(hr)));
227 return false;
228 }
229 }
230
231 if (qEnvironmentVariableIsSet("QT_D3D_MAX_FRAME_LATENCY"))
232 maxFrameLatency = UINT(qMax(0, qEnvironmentVariableIntValue("QT_D3D_MAX_FRAME_LATENCY")));
233 if (maxFrameLatency != 0)
234 qCDebug(QRHI_LOG_INFO, "Using frame latency waitable object with max frame latency %u", maxFrameLatency);
235
236 supportsAllowTearing = false;
237 IDXGIFactory5 *factory5 = nullptr;
238 if (SUCCEEDED(dxgiFactory->QueryInterface(__uuidof(IDXGIFactory5), reinterpret_cast<void **>(&factory5)))) {
239 BOOL allowTearing = false;
240 if (SUCCEEDED(factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allowTearing, sizeof(allowTearing))))
241 supportsAllowTearing = allowTearing;
242 factory5->Release();
243 }
244
245 if (debugLayer) {
246 ID3D12Debug1 *debug = nullptr;
247 if (SUCCEEDED(D3D12GetDebugInterface(__uuidof(ID3D12Debug1), reinterpret_cast<void **>(&debug)))) {
248 qCDebug(QRHI_LOG_INFO, "Enabling D3D12 debug layer");
249 debug->EnableDebugLayer();
250 debug->Release();
251 }
252 }
253
254 activeAdapter = nullptr;
255
256 if (!importedDevice) {
257 IDXGIAdapter1 *adapter;
258 int requestedAdapterIndex = -1;
259 if (qEnvironmentVariableIsSet("QT_D3D_ADAPTER_INDEX"))
260 requestedAdapterIndex = qEnvironmentVariableIntValue("QT_D3D_ADAPTER_INDEX");
261
262 if (requestedRhiAdapter)
263 adapterLuid = static_cast<QD3D12Adapter *>(requestedRhiAdapter)->luid;
264
265 // importParams or requestedRhiAdapter may specify an adapter by the luid, use that in the absence of an env.var. override.
266 if (requestedAdapterIndex < 0 && (adapterLuid.LowPart || adapterLuid.HighPart)) {
267 for (int adapterIndex = 0; dxgiFactory->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
268 DXGI_ADAPTER_DESC1 desc;
269 adapter->GetDesc1(&desc);
270 adapter->Release();
271 if (desc.AdapterLuid.LowPart == adapterLuid.LowPart
272 && desc.AdapterLuid.HighPart == adapterLuid.HighPart)
273 {
274 requestedAdapterIndex = adapterIndex;
275 break;
276 }
277 }
278 }
279
280 if (requestedAdapterIndex < 0 && flags.testFlag(QRhi::PreferSoftwareRenderer)) {
281 for (int adapterIndex = 0; dxgiFactory->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
282 DXGI_ADAPTER_DESC1 desc;
283 adapter->GetDesc1(&desc);
284 adapter->Release();
285 if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
286 requestedAdapterIndex = adapterIndex;
287 break;
288 }
289 }
290 }
291
292 for (int adapterIndex = 0; dxgiFactory->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
293 DXGI_ADAPTER_DESC1 desc;
294 adapter->GetDesc1(&desc);
295 const QString name = QString::fromUtf16(reinterpret_cast<char16_t *>(desc.Description));
296 qCDebug(QRHI_LOG_INFO, "Adapter %d: '%s' (vendor 0x%X device 0x%X flags 0x%X)",
297 adapterIndex,
298 qPrintable(name),
299 desc.VendorId,
300 desc.DeviceId,
301 desc.Flags);
302 if (!activeAdapter && (requestedAdapterIndex < 0 || requestedAdapterIndex == adapterIndex)) {
303 activeAdapter = adapter;
304 adapterLuid = desc.AdapterLuid;
305 QRhiD3D::fillDriverInfo(&driverInfoStruct, desc);
306 qCDebug(QRHI_LOG_INFO, " using this adapter");
307 } else {
308 adapter->Release();
309 }
310 }
311 if (!activeAdapter) {
312 qWarning("No adapter");
313 return false;
314 }
315
316 if (minimumFeatureLevel == 0)
317 minimumFeatureLevel = MIN_FEATURE_LEVEL;
318
319 hr = D3D12CreateDevice(activeAdapter,
320 minimumFeatureLevel,
321 __uuidof(ID3D12Device2),
322 reinterpret_cast<void **>(&dev));
323 if (FAILED(hr)) {
324 qWarning("Failed to create D3D12 device: %s", qPrintable(QSystemError::windowsComString(hr)));
325 return false;
326 }
327 } else {
328 Q_ASSERT(dev);
329 // cannot just get a IDXGIDevice from the ID3D12Device anymore, look up the adapter instead
330 adapterLuid = dev->GetAdapterLuid();
331 IDXGIAdapter1 *adapter;
332 for (int adapterIndex = 0; dxgiFactory->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
333 DXGI_ADAPTER_DESC1 desc;
334 adapter->GetDesc1(&desc);
335 if (desc.AdapterLuid.LowPart == adapterLuid.LowPart
336 && desc.AdapterLuid.HighPart == adapterLuid.HighPart)
337 {
338 activeAdapter = adapter;
339 QRhiD3D::fillDriverInfo(&driverInfoStruct, desc);
340 break;
341 } else {
342 adapter->Release();
343 }
344 }
345 if (!activeAdapter) {
346 qWarning("No adapter");
347 return false;
348 }
349 qCDebug(QRHI_LOG_INFO, "Using imported device %p", dev);
350 }
351
352 QDxgiVSyncService::instance()->refAdapter(adapterLuid);
353
354 if (debugLayer) {
355 ID3D12InfoQueue *infoQueue;
356 if (SUCCEEDED(dev->QueryInterface(__uuidof(ID3D12InfoQueue), reinterpret_cast<void **>(&infoQueue)))) {
357 if (qEnvironmentVariableIntValue("QT_D3D_DEBUG_BREAK")) {
358 infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true);
359 infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true);
360 infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, true);
361 }
362 D3D12_INFO_QUEUE_FILTER filter = {};
363 D3D12_MESSAGE_ID suppressedMessages[2] = {
364 // there is no way of knowing the clear color upfront
365 D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
366 // we have no control over viewport and scissor rects
367 D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE
368 };
369 filter.DenyList.NumIDs = 2;
370 filter.DenyList.pIDList = suppressedMessages;
371 // Setting the filter would enable Info messages (e.g. about
372 // resource creation) which we don't need.
373 D3D12_MESSAGE_SEVERITY infoSev = D3D12_MESSAGE_SEVERITY_INFO;
374 filter.DenyList.NumSeverities = 1;
375 filter.DenyList.pSeverityList = &infoSev;
376 infoQueue->PushStorageFilter(&filter);
377 infoQueue->Release();
378 }
379 }
380
381 if (!importedCommandQueue) {
382 D3D12_COMMAND_QUEUE_DESC queueDesc = {};
383 queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
384 queueDesc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
385 hr = dev->CreateCommandQueue(&queueDesc, __uuidof(ID3D12CommandQueue), reinterpret_cast<void **>(&cmdQueue));
386 if (FAILED(hr)) {
387 qWarning("Failed to create command queue: %s", qPrintable(QSystemError::windowsComString(hr)));
388 return false;
389 }
390 }
391
392 hr = dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), reinterpret_cast<void **>(&fullFence));
393 if (FAILED(hr)) {
394 qWarning("Failed to create fence: %s", qPrintable(QSystemError::windowsComString(hr)));
395 return false;
396 }
397 fullFenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
398 fullFenceCounter = 0;
399
400 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
401 hr = dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
402 __uuidof(ID3D12CommandAllocator),
403 reinterpret_cast<void **>(&cmdAllocators[i]));
404 if (FAILED(hr)) {
405 qWarning("Failed to create command allocator: %s", qPrintable(QSystemError::windowsComString(hr)));
406 return false;
407 }
408 }
409
410 if (!vma.create(dev, activeAdapter)) {
411 qWarning("Failed to initialize graphics memory suballocator");
412 return false;
413 }
414
415 if (!rtvPool.create(dev, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, "main RTV pool")) {
416 qWarning("Could not create RTV pool");
417 return false;
418 }
419
420 if (!dsvPool.create(dev, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, "main DSV pool")) {
421 qWarning("Could not create DSV pool");
422 return false;
423 }
424
425 if (!cbvSrvUavPool.create(dev, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, "main CBV-SRV-UAV pool")) {
426 qWarning("Could not create CBV-SRV-UAV pool");
427 return false;
428 }
429
430 resourcePool.create("main resource pool");
431 pipelinePool.create("main pipeline pool");
432 rootSignaturePool.create("main root signature pool");
433 releaseQueue.create(&resourcePool, &pipelinePool, &rootSignaturePool);
434 barrierGen.create(&resourcePool);
435
436 if (!samplerMgr.create(dev)) {
437 qWarning("Could not create sampler pool and shader-visible sampler heap");
438 return false;
439 }
440
441 if (!mipmapGen.create(this)) {
442 qWarning("Could not initialize mipmap generator");
443 return false;
444 }
445
446 if (!mipmapGen3D.create(this)) {
447 qWarning("Could not initialize 3D texture mipmap generator");
448 return false;
449 }
450
451 const qint32 smallStagingSize = aligned(SMALL_STAGING_AREA_BYTES_PER_FRAME, QD3D12StagingArea::ALIGNMENT);
452 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
453 if (!smallStagingAreas[i].create(this, smallStagingSize, D3D12_HEAP_TYPE_UPLOAD)) {
454 qWarning("Could not create host-visible staging area");
455 return false;
456 }
457 QString decoratedName = QLatin1String("Small staging area buffer/");
458 decoratedName += QString::number(i);
459 smallStagingAreas[i].mem.buffer->SetName(reinterpret_cast<LPCWSTR>(decoratedName.utf16()));
460 }
461
462 if (!shaderVisibleCbvSrvUavHeap.create(dev,
463 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
464 SHADER_VISIBLE_CBV_SRV_UAV_HEAP_PER_FRAME_START_SIZE))
465 {
466 qWarning("Could not create first shader-visible CBV/SRV/UAV heap");
467 return false;
468 }
469
470 if (flags.testFlag(QRhi::EnableTimestamps)) {
471 static bool wantsStablePowerState = qEnvironmentVariableIntValue("QT_D3D_STABLE_POWER_STATE");
472 //
473 // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-setstablepowerstate
474 //
475 // NB! This is a _global_ setting, affecting other processes (and 3D
476 // APIs such as Vulkan), as long as this application is running. Hence
477 // making it an env.var. for now. Never enable it in production. But
478 // extremely useful for the GPU timings with NVIDIA at least; the
479 // timestamps become stable and smooth, making the number readable and
480 // actually useful e.g. in Quick 3D's DebugView when this is enabled.
481 // (otherwise the number's all over the place)
482 //
483 // See also
484 // https://developer.nvidia.com/blog/advanced-api-performance-setstablepowerstate/
485 // for possible other approaches.
486 //
487 if (wantsStablePowerState)
488 dev->SetStablePowerState(TRUE);
489
490 hr = cmdQueue->GetTimestampFrequency(&timestampTicksPerSecond);
491 if (FAILED(hr)) {
492 qWarning("Failed to query timestamp frequency: %s",
493 qPrintable(QSystemError::windowsComString(hr)));
494 return false;
495 }
496 if (!timestampQueryHeap.create(dev, QD3D12_FRAMES_IN_FLIGHT * 2, D3D12_QUERY_HEAP_TYPE_TIMESTAMP)) {
497 qWarning("Failed to create timestamp query pool");
498 return false;
499 }
500 const quint32 readbackBufSize = QD3D12_FRAMES_IN_FLIGHT * 2 * sizeof(quint64);
501 if (!timestampReadbackArea.create(this, readbackBufSize, D3D12_HEAP_TYPE_READBACK)) {
502 qWarning("Failed to create timestamp readback buffer");
503 return false;
504 }
505 timestampReadbackArea.mem.buffer->SetName(L"Timestamp readback buffer");
506 memset(timestampReadbackArea.mem.p, 0, readbackBufSize);
507 }
508
509 caps = {};
510 D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {};
511 if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3)))) {
512 caps.multiView = options3.ViewInstancingTier != D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED;
513 // https://microsoft.github.io/DirectX-Specs/d3d/RelaxedCasting.html
514 caps.textureViewFormat = options3.CastingFullyTypedFormatSupported;
515 }
516
517#ifdef QRHI_D3D12_CL5_AVAILABLE
518 D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = {};
519 if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6)))) {
520 caps.vrs = options6.VariableShadingRateTier != D3D12_VARIABLE_SHADING_RATE_TIER_NOT_SUPPORTED;
521 caps.vrsMap = options6.VariableShadingRateTier == D3D12_VARIABLE_SHADING_RATE_TIER_2;
522 caps.vrsAdditionalRates = options6.AdditionalShadingRatesSupported;
523 shadingRateImageTileSize = options6.ShadingRateImageTileSize;
524 }
525#else
526 caps.vrs = false;
527 caps.vrsMap = false;
528 caps.vrsAdditionalRates = false;
529#endif
530
531 {
532 D3D12_INDIRECT_ARGUMENT_DESC arg = {};
533 arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW;
534
535 D3D12_COMMAND_SIGNATURE_DESC sigDesc = {};
536 sigDesc.ByteStride = sizeof(D3D12_DRAW_ARGUMENTS);
537 sigDesc.NumArgumentDescs = 1;
538 sigDesc.pArgumentDescs = &arg;
539
540 hr = dev->CreateCommandSignature(&sigDesc, nullptr, IID_PPV_ARGS(&drawCommandSignature));
541 if (FAILED(hr)) {
542 qWarning("Failed to create draw command signature: %s", qPrintable(QSystemError::windowsComString(hr)));
543 return false;
544 }
545 }
546
547 {
548 D3D12_INDIRECT_ARGUMENT_DESC arg = {};
549 arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
550
551 D3D12_COMMAND_SIGNATURE_DESC sigDesc = {};
552 sigDesc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
553 sigDesc.NumArgumentDescs = 1;
554 sigDesc.pArgumentDescs = &arg;
555
556 hr = dev->CreateCommandSignature(&sigDesc, nullptr, IID_PPV_ARGS(&drawIndexedCommandSignature));
557 if (FAILED(hr)) {
558 qWarning("Failed to create draw indexed command signature: %s", qPrintable(QSystemError::windowsComString(hr)));
559 return false;
560 }
561 }
562
563 deviceLost = false;
564 offscreenActive = false;
565
566 nativeHandlesStruct.dev = dev;
567 nativeHandlesStruct.minimumFeatureLevel = minimumFeatureLevel;
568 nativeHandlesStruct.adapterLuidLow = adapterLuid.LowPart;
569 nativeHandlesStruct.adapterLuidHigh = adapterLuid.HighPart;
570 nativeHandlesStruct.commandQueue = cmdQueue;
571
572 return true;
573}
574
575void QRhiD3D12::destroy()
576{
577 if (!deviceLost && fullFence && fullFenceEvent)
578 waitGpu();
579
580 releaseQueue.releaseAll();
581
582 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
583 if (offscreenCb[i]) {
584 if (offscreenCb[i]->cmdList)
585 offscreenCb[i]->cmdList->Release();
586 delete offscreenCb[i];
587 offscreenCb[i] = nullptr;
588 }
589 }
590
591 timestampQueryHeap.destroy();
592 timestampReadbackArea.destroy();
593
594 shaderVisibleCbvSrvUavHeap.destroy();
595
596 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i)
597 smallStagingAreas[i].destroy();
598
599 mipmapGen.destroy();
600 mipmapGen3D.destroy();
601 samplerMgr.destroy();
602 resourcePool.destroy();
603 pipelinePool.destroy();
604 rootSignaturePool.destroy();
605 rtvPool.destroy();
606 dsvPool.destroy();
607 cbvSrvUavPool.destroy();
608
609 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
610 if (cmdAllocators[i]) {
611 cmdAllocators[i]->Release();
612 cmdAllocators[i] = nullptr;
613 }
614 }
615
616 if (fullFenceEvent) {
617 CloseHandle(fullFenceEvent);
618 fullFenceEvent = nullptr;
619 }
620
621 if (fullFence) {
622 fullFence->Release();
623 fullFence = nullptr;
624 }
625
626 if (!importedCommandQueue) {
627 if (cmdQueue) {
628 cmdQueue->Release();
629 cmdQueue = nullptr;
630 }
631 }
632
633 vma.destroy();
634
635 if (!importedDevice) {
636 if (dev) {
637 dev->Release();
638 dev = nullptr;
639 }
640 }
641
642 if (dcompDevice) {
643 dcompDevice->Release();
644 dcompDevice = nullptr;
645 }
646
647 if (activeAdapter) {
648 activeAdapter->Release();
649 activeAdapter = nullptr;
650 }
651
652 if (dxgiFactory) {
653 dxgiFactory->Release();
654 dxgiFactory = nullptr;
655 }
656
657 adapterLuid = {};
658 importedDevice = false;
659 importedCommandQueue = false;
660
661 QDxgiVSyncService::instance()->derefAdapter(adapterLuid);
662
663 if (drawCommandSignature) {
664 drawCommandSignature->Release();
665 drawCommandSignature = nullptr;
666 }
667
668 if (drawIndexedCommandSignature) {
669 drawIndexedCommandSignature->Release();
670 drawIndexedCommandSignature = nullptr;
671 }
672}
673
674QRhi::AdapterList QRhiD3D12::enumerateAdaptersBeforeCreate(QRhiNativeHandles *nativeHandles) const
675{
676 LUID requestedLuid = {};
677 if (nativeHandles) {
678 QRhiD3D12NativeHandles *h = static_cast<QRhiD3D12NativeHandles *>(nativeHandles);
679 const LUID adapterLuid = { h->adapterLuidLow, h->adapterLuidHigh };
680 if (adapterLuid.LowPart || adapterLuid.HighPart)
681 requestedLuid = adapterLuid;
682 }
683
684 IDXGIFactory2 *dxgi = nullptr;
685 if (FAILED(CreateDXGIFactory2(0, __uuidof(IDXGIFactory2), reinterpret_cast<void **>(&dxgi))))
686 return {};
687
688 QRhi::AdapterList list;
689 IDXGIAdapter1 *adapter;
690 for (int adapterIndex = 0; dxgi->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
691 DXGI_ADAPTER_DESC1 desc;
692 adapter->GetDesc1(&desc);
693 adapter->Release();
694 if (requestedLuid.LowPart || requestedLuid.HighPart) {
695 if (desc.AdapterLuid.LowPart != requestedLuid.LowPart
696 || desc.AdapterLuid.HighPart != requestedLuid.HighPart)
697 {
698 continue;
699 }
700 }
701 QD3D12Adapter *a = new QD3D12Adapter;
702 a->luid = desc.AdapterLuid;
703 QRhiD3D::fillDriverInfo(&a->adapterInfo, desc);
704 list.append(a);
705 }
706
707 dxgi->Release();
708 return list;
709}
710
711QRhiDriverInfo QD3D12Adapter::info() const
712{
713 return adapterInfo;
714}
715
716QList<int> QRhiD3D12::supportedSampleCounts() const
717{
718 return { 1, 2, 4, 8 };
719}
720
721QList<QSize> QRhiD3D12::supportedShadingRates(int sampleCount) const
722{
723 QList<QSize> sizes;
724 switch (sampleCount) {
725 case 0:
726 case 1:
727 if (caps.vrsAdditionalRates) {
728 sizes.append(QSize(4, 4));
729 sizes.append(QSize(4, 2));
730 sizes.append(QSize(2, 4));
731 }
732 sizes.append(QSize(2, 2));
733 sizes.append(QSize(2, 1));
734 sizes.append(QSize(1, 2));
735 break;
736 case 2:
737 if (caps.vrsAdditionalRates)
738 sizes.append(QSize(2, 4));
739 sizes.append(QSize(2, 2));
740 sizes.append(QSize(2, 1));
741 sizes.append(QSize(1, 2));
742 break;
743 case 4:
744 sizes.append(QSize(2, 2));
745 sizes.append(QSize(2, 1));
746 sizes.append(QSize(1, 2));
747 break;
748 default:
749 break;
750 }
751 sizes.append(QSize(1, 1));
752 return sizes;
753}
754
755QRhiSwapChain *QRhiD3D12::createSwapChain()
756{
757 return new QD3D12SwapChain(this);
758}
759
760QRhiBuffer *QRhiD3D12::createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, quint32 size)
761{
762 return new QD3D12Buffer(this, type, usage, size);
763}
764
765int QRhiD3D12::ubufAlignment() const
766{
767 return D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; // 256
768}
769
770bool QRhiD3D12::isYUpInFramebuffer() const
771{
772 return false;
773}
774
775bool QRhiD3D12::isYUpInNDC() const
776{
777 return true;
778}
779
780bool QRhiD3D12::isClipDepthZeroToOne() const
781{
782 return true;
783}
784
785QMatrix4x4 QRhiD3D12::clipSpaceCorrMatrix() const
786{
787 // Like with Vulkan, but Y is already good.
788
789 static QMatrix4x4 m;
790 if (m.isIdentity()) {
791 // NB the ctor takes row-major
792 m = QMatrix4x4(1.0f, 0.0f, 0.0f, 0.0f,
793 0.0f, 1.0f, 0.0f, 0.0f,
794 0.0f, 0.0f, 0.5f, 0.5f,
795 0.0f, 0.0f, 0.0f, 1.0f);
796 }
797 return m;
798}
799
800bool QRhiD3D12::isTextureFormatSupported(QRhiTexture::Format format, QRhiTexture::Flags flags) const
801{
802 Q_UNUSED(flags);
803
804 if (format >= QRhiTexture::ETC2_RGB8 && format <= QRhiTexture::ASTC_12x12)
805 return false;
806
807 return true;
808}
809
810bool QRhiD3D12::isFeatureSupported(QRhi::Feature feature) const
811{
812 switch (feature) {
813 case QRhi::MultisampleTexture:
814 return true;
815 case QRhi::MultisampleRenderBuffer:
816 return true;
817 case QRhi::DebugMarkers:
818#ifdef QRHI_D3D12_HAS_OLD_PIX
819 return true;
820#else
821 return false;
822#endif
823 case QRhi::Timestamps:
824 return true;
825 case QRhi::Instancing:
826 return true;
827 case QRhi::CustomInstanceStepRate:
828 return true;
829 case QRhi::PrimitiveRestart:
830 return true;
831 case QRhi::NonDynamicUniformBuffers:
832 return false;
833 case QRhi::NonFourAlignedEffectiveIndexBufferOffset:
834 return true;
835 case QRhi::NPOTTextureRepeat:
836 return true;
837 case QRhi::RedOrAlpha8IsRed:
838 return true;
839 case QRhi::ElementIndexUint:
840 return true;
841 case QRhi::Compute:
842 return true;
843 case QRhi::WideLines:
844 return false;
845 case QRhi::VertexShaderPointSize:
846 return false;
847 case QRhi::BaseVertex:
848 return true;
849 case QRhi::BaseInstance:
850 return true;
851 case QRhi::TriangleFanTopology:
852 return false;
853 case QRhi::ReadBackNonUniformBuffer:
854 return true;
855 case QRhi::ReadBackNonBaseMipLevel:
856 return true;
857 case QRhi::TexelFetch:
858 return true;
859 case QRhi::RenderToNonBaseMipLevel:
860 return true;
861 case QRhi::IntAttributes:
862 return true;
863 case QRhi::ScreenSpaceDerivatives:
864 return true;
865 case QRhi::ReadBackAnyTextureFormat:
866 return true;
867 case QRhi::PipelineCacheDataLoadSave:
868 return false; // ###
869 case QRhi::ImageDataStride:
870 return true;
871 case QRhi::RenderBufferImport:
872 return false;
873 case QRhi::ThreeDimensionalTextures:
874 return true;
875 case QRhi::RenderTo3DTextureSlice:
876 return true;
877 case QRhi::TextureArrays:
878 return true;
879 case QRhi::Tessellation:
880 return true;
881 case QRhi::GeometryShader:
882 return true;
883 case QRhi::TextureArrayRange:
884 return true;
885 case QRhi::NonFillPolygonMode:
886 return true;
887 case QRhi::OneDimensionalTextures:
888 return true;
889 case QRhi::OneDimensionalTextureMipmaps:
890 return false; // we generate mipmaps ourselves with compute and this is not implemented
891 case QRhi::HalfAttributes:
892 return true;
893 case QRhi::RenderToOneDimensionalTexture:
894 return true;
895 case QRhi::ThreeDimensionalTextureMipmaps:
896 return true;
897 case QRhi::MultiView:
898 return caps.multiView;
899 case QRhi::TextureViewFormat:
900 return caps.textureViewFormat;
901 case QRhi::ResolveDepthStencil:
902 // there is no Multisample Resolve support for depth/stencil formats
903 // https://learn.microsoft.com/en-us/windows/win32/direct3ddxgi/hardware-support-for-direct3d-12-1-formats
904 return false;
905 case QRhi::VariableRateShading:
906 return caps.vrs;
907 case QRhi::VariableRateShadingMap:
908 case QRhi::VariableRateShadingMapWithTexture:
909 return caps.vrsMap;
910 case QRhi::PerRenderTargetBlending:
911 case QRhi::SampleVariables:
912 return true;
913 case QRhi::InstanceIndexIncludesBaseInstance:
914 return false;
915 case QRhi::DepthClamp:
916 return true;
917 case QRhi::DrawIndirect:
918 return drawCommandSignature != nullptr && drawIndexedCommandSignature != nullptr;
919 case QRhi::DrawIndirectMulti:
920 return drawCommandSignature != nullptr && drawIndexedCommandSignature != nullptr;
921 case QRhi::ShaderDrawParameters:
922 return false;
923 }
924 return false;
925}
926
927int QRhiD3D12::resourceLimit(QRhi::ResourceLimit limit) const
928{
929 switch (limit) {
930 case QRhi::TextureSizeMin:
931 return 1;
932 case QRhi::TextureSizeMax:
933 return 16384;
934 case QRhi::MaxColorAttachments:
935 return 8;
936 case QRhi::FramesInFlight:
937 return QD3D12_FRAMES_IN_FLIGHT;
938 case QRhi::MaxAsyncReadbackFrames:
939 return QD3D12_FRAMES_IN_FLIGHT;
940 case QRhi::MaxThreadGroupsPerDimension:
941 return 65535;
942 case QRhi::MaxThreadsPerThreadGroup:
943 return 1024;
944 case QRhi::MaxThreadGroupX:
945 return 1024;
946 case QRhi::MaxThreadGroupY:
947 return 1024;
948 case QRhi::MaxThreadGroupZ:
949 return 1024;
950 case QRhi::TextureArraySizeMax:
951 return 2048;
952 case QRhi::MaxUniformBufferRange:
953 return 65536;
954 case QRhi::MaxVertexInputs:
955 return 32;
956 case QRhi::MaxVertexOutputs:
957 return 32;
958 case QRhi::ShadingRateImageTileSize:
959 return shadingRateImageTileSize;
960 }
961 return 0;
962}
963
964const QRhiNativeHandles *QRhiD3D12::nativeHandles()
965{
966 return &nativeHandlesStruct;
967}
968
969QRhiDriverInfo QRhiD3D12::driverInfo() const
970{
971 return driverInfoStruct;
972}
973
974QRhiStats QRhiD3D12::statistics()
975{
976 QRhiStats result;
977 result.totalPipelineCreationTime = totalPipelineCreationTime();
978
979 D3D12MA::Budget budgets[2]; // [gpu, system] with discreet GPU or [shared, nothing] with UMA
980 vma.getBudget(&budgets[0], &budgets[1]);
981 for (int i = 0; i < 2; ++i) {
982 const D3D12MA::Statistics &stats(budgets[i].Stats);
983 result.blockCount += stats.BlockCount;
984 result.allocCount += stats.AllocationCount;
985 result.usedBytes += stats.AllocationBytes;
986 result.unusedBytes += stats.BlockBytes - stats.AllocationBytes;
987 result.totalUsageBytes += budgets[i].UsageBytes;
988 }
989
990 return result;
991}
992
993bool QRhiD3D12::makeThreadLocalNativeContextCurrent()
994{
995 // not applicable
996 return false;
997}
998
999void QRhiD3D12::setQueueSubmitParams(QRhiNativeHandles *)
1000{
1001 // not applicable
1002}
1003
1004void QRhiD3D12::releaseCachedResources()
1005{
1006 shaderBytecodeCache.data.clear();
1007}
1008
1009bool QRhiD3D12::isDeviceLost() const
1010{
1011 return deviceLost;
1012}
1013
1014QByteArray QRhiD3D12::pipelineCacheData()
1015{
1016 return {};
1017}
1018
1019void QRhiD3D12::setPipelineCacheData(const QByteArray &data)
1020{
1021 Q_UNUSED(data);
1022}
1023
1024QRhiRenderBuffer *QRhiD3D12::createRenderBuffer(QRhiRenderBuffer::Type type, const QSize &pixelSize,
1025 int sampleCount, QRhiRenderBuffer::Flags flags,
1026 QRhiTexture::Format backingFormatHint)
1027{
1028 return new QD3D12RenderBuffer(this, type, pixelSize, sampleCount, flags, backingFormatHint);
1029}
1030
1031QRhiTexture *QRhiD3D12::createTexture(QRhiTexture::Format format,
1032 const QSize &pixelSize, int depth, int arraySize,
1033 int sampleCount, QRhiTexture::Flags flags)
1034{
1035 return new QD3D12Texture(this, format, pixelSize, depth, arraySize, sampleCount, flags);
1036}
1037
1038QRhiSampler *QRhiD3D12::createSampler(QRhiSampler::Filter magFilter, QRhiSampler::Filter minFilter,
1039 QRhiSampler::Filter mipmapMode,
1040 QRhiSampler::AddressMode u, QRhiSampler::AddressMode v, QRhiSampler::AddressMode w)
1041{
1042 return new QD3D12Sampler(this, magFilter, minFilter, mipmapMode, u, v, w);
1043}
1044
1045QRhiTextureRenderTarget *QRhiD3D12::createTextureRenderTarget(const QRhiTextureRenderTargetDescription &desc,
1046 QRhiTextureRenderTarget::Flags flags)
1047{
1048 return new QD3D12TextureRenderTarget(this, desc, flags);
1049}
1050
1051QRhiShadingRateMap *QRhiD3D12::createShadingRateMap()
1052{
1053 return new QD3D12ShadingRateMap(this);
1054}
1055
1056QRhiGraphicsPipeline *QRhiD3D12::createGraphicsPipeline()
1057{
1058 return new QD3D12GraphicsPipeline(this);
1059}
1060
1061QRhiComputePipeline *QRhiD3D12::createComputePipeline()
1062{
1063 return new QD3D12ComputePipeline(this);
1064}
1065
1066QRhiShaderResourceBindings *QRhiD3D12::createShaderResourceBindings()
1067{
1068 return new QD3D12ShaderResourceBindings(this);
1069}
1070
1071void QRhiD3D12::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps)
1072{
1073 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1074 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1075 QD3D12GraphicsPipeline *psD = QRHI_RES(QD3D12GraphicsPipeline, ps);
1076 const bool pipelineChanged = cbD->currentGraphicsPipeline != psD || cbD->currentPipelineGeneration != psD->generation;
1077
1078 if (pipelineChanged) {
1079 cbD->currentGraphicsPipeline = psD;
1080 cbD->currentComputePipeline = nullptr;
1081 cbD->currentPipelineGeneration = psD->generation;
1082
1083 if (QD3D12Pipeline *pipeline = pipelinePool.lookupRef(psD->handle)) {
1084 Q_ASSERT(pipeline->type == QD3D12Pipeline::Graphics);
1085 cbD->cmdList->SetPipelineState(pipeline->pso);
1086 if (QD3D12RootSignature *rs = rootSignaturePool.lookupRef(psD->rootSigHandle))
1087 cbD->cmdList->SetGraphicsRootSignature(rs->rootSig);
1088 }
1089
1090 cbD->cmdList->IASetPrimitiveTopology(psD->topology);
1091
1092 if (psD->viewInstanceMask)
1093 cbD->cmdList->SetViewInstanceMask(psD->viewInstanceMask);
1094
1095 if (cbD->hasCustomScissorSet && !psD->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor))
1096 setDefaultScissor(cbD);
1097 }
1098}
1099
1100void QD3D12CommandBuffer::visitUniformBuffer(QD3D12Stage s,
1101 const QRhiShaderResourceBinding::Data::UniformBufferData &d,
1102 int,
1103 int binding,
1104 int dynamicOffsetCount,
1105 const QRhiCommandBuffer::DynamicOffset *dynamicOffsets)
1106{
1107 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, d.buf);
1108 quint32 offset = d.offset;
1109 if (d.hasDynamicOffset) {
1110 for (int i = 0; i < dynamicOffsetCount; ++i) {
1111 const QRhiCommandBuffer::DynamicOffset &dynOfs(dynamicOffsets[i]);
1112 if (dynOfs.first == binding) {
1113 Q_ASSERT(aligned(dynOfs.second, 256u) == dynOfs.second);
1114 offset += dynOfs.second;
1115 }
1116 }
1117 }
1118 QRHI_RES_RHI(QRhiD3D12);
1119 visitorData.cbufs[s].append({ bufD->handles[rhiD->currentFrameSlot], offset });
1120}
1121
1122void QD3D12CommandBuffer::visitTexture(QD3D12Stage s,
1123 const QRhiShaderResourceBinding::TextureAndSampler &d,
1124 int)
1125{
1126 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, d.tex);
1127 visitorData.srvs[s].append(texD->srv);
1128}
1129
1130void QD3D12CommandBuffer::visitSampler(QD3D12Stage s,
1131 const QRhiShaderResourceBinding::TextureAndSampler &d,
1132 int)
1133{
1134 QD3D12Sampler *samplerD = QRHI_RES(QD3D12Sampler, d.sampler);
1135 visitorData.samplers[s].append(samplerD->lookupOrCreateShaderVisibleDescriptor());
1136}
1137
1138void QD3D12CommandBuffer::visitStorageBuffer(QD3D12Stage s,
1139 const QRhiShaderResourceBinding::Data::StorageBufferData &d,
1140 QD3D12ShaderResourceVisitor::StorageOp,
1141 int)
1142{
1143 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, d.buf);
1144 // SPIRV-Cross generated HLSL uses RWByteAddressBuffer
1145 D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
1146 uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
1147 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
1148 uavDesc.Buffer.FirstElement = d.offset / 4;
1149 uavDesc.Buffer.NumElements = aligned(bufD->m_size - d.offset, 4u) / 4;
1150 uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
1151 visitorData.uavs[s].append({ bufD->handles[0], uavDesc });
1152}
1153
1154void QD3D12CommandBuffer::visitStorageImage(QD3D12Stage s,
1155 const QRhiShaderResourceBinding::Data::StorageImageData &d,
1156 QD3D12ShaderResourceVisitor::StorageOp,
1157 int)
1158{
1159 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, d.tex);
1160 const bool isCube = texD->m_flags.testFlag(QRhiTexture::CubeMap);
1161 const bool isArray = texD->m_flags.testFlag(QRhiTexture::TextureArray);
1162 const bool is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
1163 D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
1164 uavDesc.Format = texD->rtFormat;
1165 if (isCube) {
1166 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
1167 uavDesc.Texture2DArray.MipSlice = UINT(d.level);
1168 uavDesc.Texture2DArray.FirstArraySlice = 0;
1169 uavDesc.Texture2DArray.ArraySize = 6;
1170 } else if (isArray) {
1171 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
1172 uavDesc.Texture2DArray.MipSlice = UINT(d.level);
1173 uavDesc.Texture2DArray.FirstArraySlice = 0;
1174 uavDesc.Texture2DArray.ArraySize = UINT(qMax(0, texD->m_arraySize));
1175 } else if (is3D) {
1176 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
1177 uavDesc.Texture3D.MipSlice = UINT(d.level);
1178 uavDesc.Texture3D.WSize = UINT(-1);
1179 } else {
1180 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
1181 uavDesc.Texture2D.MipSlice = UINT(d.level);
1182 }
1183 visitorData.uavs[s].append({ texD->handle, uavDesc });
1184}
1185
1186void QRhiD3D12::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBindings *srb,
1187 int dynamicOffsetCount,
1188 const QRhiCommandBuffer::DynamicOffset *dynamicOffsets)
1189{
1190 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1191 Q_ASSERT(cbD->recordingPass != QD3D12CommandBuffer::NoPass);
1192 QD3D12GraphicsPipeline *gfxPsD = QRHI_RES(QD3D12GraphicsPipeline, cbD->currentGraphicsPipeline);
1193 QD3D12ComputePipeline *compPsD = QRHI_RES(QD3D12ComputePipeline, cbD->currentComputePipeline);
1194
1195 if (!srb) {
1196 if (gfxPsD)
1197 srb = gfxPsD->m_shaderResourceBindings;
1198 else
1199 srb = compPsD->m_shaderResourceBindings;
1200 }
1201
1202 QD3D12ShaderResourceBindings *srbD = QRHI_RES(QD3D12ShaderResourceBindings, srb);
1203
1204 bool pipelineChanged = false;
1205 if (gfxPsD) {
1206 pipelineChanged = srbD->lastUsedGraphicsPipeline != gfxPsD;
1207 srbD->lastUsedGraphicsPipeline = gfxPsD;
1208 } else {
1209 pipelineChanged = srbD->lastUsedComputePipeline != compPsD;
1210 srbD->lastUsedComputePipeline = compPsD;
1211 }
1212
1213 for (int i = 0, ie = srbD->m_bindings.size(); i != ie; ++i) {
1214 const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->m_bindings[i]);
1215 switch (b->type) {
1216 case QRhiShaderResourceBinding::UniformBuffer:
1217 {
1218 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, b->u.ubuf.buf);
1219 Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer));
1220 Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic);
1221 sanityCheckResourceOwnership(bufD);
1222 bufD->executeHostWritesForFrameSlot(currentFrameSlot);
1223 }
1224 break;
1225 case QRhiShaderResourceBinding::SampledTexture:
1226 case QRhiShaderResourceBinding::Texture:
1227 case QRhiShaderResourceBinding::Sampler:
1228 {
1229 const QRhiShaderResourceBinding::Data::TextureAndOrSamplerData *data = &b->u.stex;
1230 for (int elem = 0; elem < data->count; ++elem) {
1231 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, data->texSamplers[elem].tex);
1232 QD3D12Sampler *samplerD = QRHI_RES(QD3D12Sampler, data->texSamplers[elem].sampler);
1233 // We use the same code path for both combined and separate
1234 // images and samplers, so tex or sampler (but not both) can be
1235 // null here.
1236 Q_ASSERT(texD || samplerD);
1237 sanityCheckResourceOwnership(texD);
1238 sanityCheckResourceOwnership(samplerD);
1239 if (texD) {
1240 UINT state = 0;
1241 if (b->stage == QRhiShaderResourceBinding::FragmentStage) {
1242 state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
1243 } else if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
1244 state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
1245 } else {
1246 state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
1247 }
1248 barrierGen.addTransitionBarrier(texD->handle, D3D12_RESOURCE_STATES(state));
1249 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1250 }
1251 }
1252 }
1253 break;
1254 case QRhiShaderResourceBinding::ImageLoad:
1255 case QRhiShaderResourceBinding::ImageStore:
1256 case QRhiShaderResourceBinding::ImageLoadStore:
1257 {
1258 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, b->u.simage.tex);
1259 sanityCheckResourceOwnership(texD);
1260 if (QD3D12Resource *res = resourcePool.lookupRef(texD->handle)) {
1261 if (res->uavUsage) {
1262 if (res->uavUsage & QD3D12Resource::UavUsageWrite) {
1263 // RaW or WaW
1264 barrierGen.enqueueUavBarrier(cbD, texD->handle);
1265 } else {
1266 if (b->type == QRhiShaderResourceBinding::ImageStore
1267 || b->type == QRhiShaderResourceBinding::ImageLoadStore)
1268 {
1269 // WaR or WaW
1270 barrierGen.enqueueUavBarrier(cbD, texD->handle);
1271 }
1272 }
1273 }
1274 res->uavUsage = 0;
1275 if (b->type == QRhiShaderResourceBinding::ImageLoad || b->type == QRhiShaderResourceBinding::ImageLoadStore)
1276 res->uavUsage |= QD3D12Resource::UavUsageRead;
1277 if (b->type == QRhiShaderResourceBinding::ImageStore || b->type == QRhiShaderResourceBinding::ImageLoadStore)
1278 res->uavUsage |= QD3D12Resource::UavUsageWrite;
1279 barrierGen.addTransitionBarrier(texD->handle, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
1280 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1281 }
1282 }
1283 break;
1284 case QRhiShaderResourceBinding::BufferLoad:
1285 case QRhiShaderResourceBinding::BufferStore:
1286 case QRhiShaderResourceBinding::BufferLoadStore:
1287 {
1288 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, b->u.sbuf.buf);
1289 sanityCheckResourceOwnership(bufD);
1290 Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer));
1291 Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
1292 if (QD3D12Resource *res = resourcePool.lookupRef(bufD->handles[0])) {
1293 if (res->uavUsage) {
1294 if (res->uavUsage & QD3D12Resource::UavUsageWrite) {
1295 // RaW or WaW
1296 barrierGen.enqueueUavBarrier(cbD, bufD->handles[0]);
1297 } else {
1298 if (b->type == QRhiShaderResourceBinding::BufferStore
1299 || b->type == QRhiShaderResourceBinding::BufferLoadStore)
1300 {
1301 // WaR or WaW
1302 barrierGen.enqueueUavBarrier(cbD, bufD->handles[0]);
1303 }
1304 }
1305 }
1306 res->uavUsage = 0;
1307 if (b->type == QRhiShaderResourceBinding::BufferLoad || b->type == QRhiShaderResourceBinding::BufferLoadStore)
1308 res->uavUsage |= QD3D12Resource::UavUsageRead;
1309 if (b->type == QRhiShaderResourceBinding::BufferStore || b->type == QRhiShaderResourceBinding::BufferLoadStore)
1310 res->uavUsage |= QD3D12Resource::UavUsageWrite;
1311 barrierGen.addTransitionBarrier(bufD->handles[0], D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
1312 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1313 }
1314 }
1315 break;
1316 }
1317 }
1318
1319 const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb);
1320 const bool srbRebuilt = cbD->currentSrbGeneration != srbD->generation;
1321
1322 if (pipelineChanged || srbChanged || srbRebuilt || srbD->hasDynamicOffset) {
1323 const QD3D12ShaderStageData *stageData = gfxPsD ? gfxPsD->stageData.data() : &compPsD->stageData;
1324
1325 // The order of root parameters must match
1326 // QD3D12ShaderResourceBindings::createRootSignature(), meaning the
1327 // logic below must mirror that function (uniform buffers first etc.)
1328
1329 QD3D12ShaderResourceVisitor visitor(srbD, stageData, gfxPsD ? 5 : 1);
1330
1331 QD3D12CommandBuffer::VisitorData &visitorData(cbD->visitorData);
1332 visitorData = {};
1333
1334 using namespace std::placeholders;
1335 visitor.uniformBuffer = std::bind(&QD3D12CommandBuffer::visitUniformBuffer, cbD, _1, _2, _3, _4, dynamicOffsetCount, dynamicOffsets);
1336 visitor.texture = std::bind(&QD3D12CommandBuffer::visitTexture, cbD, _1, _2, _3);
1337 visitor.sampler = std::bind(&QD3D12CommandBuffer::visitSampler, cbD, _1, _2, _3);
1338 visitor.storageBuffer = std::bind(&QD3D12CommandBuffer::visitStorageBuffer, cbD, _1, _2, _3, _4);
1339 visitor.storageImage = std::bind(&QD3D12CommandBuffer::visitStorageImage, cbD, _1, _2, _3, _4);
1340
1341 visitor.visit();
1342
1343 quint32 cbvSrvUavCount = 0;
1344 for (int s = 0; s < 6; ++s) {
1345 // CBs use root constant buffer views, no need to count them here
1346 cbvSrvUavCount += visitorData.srvs[s].count();
1347 cbvSrvUavCount += visitorData.uavs[s].count();
1348 }
1349
1350 bool gotNewHeap = false;
1351 if (!ensureShaderVisibleDescriptorHeapCapacity(&shaderVisibleCbvSrvUavHeap,
1352 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
1353 currentFrameSlot,
1354 cbvSrvUavCount,
1355 &gotNewHeap))
1356 {
1357 return;
1358 }
1359 if (gotNewHeap) {
1360 qCDebug(QRHI_LOG_INFO, "Created new shader-visible CBV/SRV/UAV descriptor heap,"
1361 " per-frame slice size is now %u,"
1362 " if this happens frequently then that's not great.",
1363 shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[0].capacity);
1364 bindShaderVisibleHeaps(cbD);
1365 }
1366
1367 int rootParamIndex = 0;
1368 for (int s = 0; s < 6; ++s) {
1369 if (!visitorData.cbufs[s].isEmpty()) {
1370 for (int i = 0, count = visitorData.cbufs[s].count(); i < count; ++i) {
1371 const auto &cbuf(visitorData.cbufs[s][i]);
1372 if (QD3D12Resource *res = resourcePool.lookupRef(cbuf.first)) {
1373 quint32 offset = cbuf.second;
1374 D3D12_GPU_VIRTUAL_ADDRESS gpuAddr = res->resource->GetGPUVirtualAddress() + offset;
1375 if (cbD->currentGraphicsPipeline)
1376 cbD->cmdList->SetGraphicsRootConstantBufferView(rootParamIndex, gpuAddr);
1377 else
1378 cbD->cmdList->SetComputeRootConstantBufferView(rootParamIndex, gpuAddr);
1379 }
1380 rootParamIndex += 1;
1381 }
1382 }
1383 }
1384 for (int s = 0; s < 6; ++s) {
1385 if (!visitorData.srvs[s].isEmpty()) {
1386 QD3D12DescriptorHeap &gpuSrvHeap(shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot]);
1387 QD3D12Descriptor startDesc = gpuSrvHeap.get(visitorData.srvs[s].count());
1388 for (int i = 0, count = visitorData.srvs[s].count(); i < count; ++i) {
1389 const auto &srv(visitorData.srvs[s][i]);
1390 dev->CopyDescriptorsSimple(1, gpuSrvHeap.incremented(startDesc, i).cpuHandle, srv.cpuHandle,
1391 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
1392 }
1393
1394 if (cbD->currentGraphicsPipeline)
1395 cbD->cmdList->SetGraphicsRootDescriptorTable(rootParamIndex, startDesc.gpuHandle);
1396 else if (cbD->currentComputePipeline)
1397 cbD->cmdList->SetComputeRootDescriptorTable(rootParamIndex, startDesc.gpuHandle);
1398
1399 rootParamIndex += 1;
1400 }
1401 }
1402 for (int s = 0; s < 6; ++s) {
1403 // Samplers are one parameter / descriptor table each, and the
1404 // descriptor is from the shader visible sampler heap already.
1405 for (const QD3D12Descriptor &samplerDescriptor : visitorData.samplers[s]) {
1406 if (cbD->currentGraphicsPipeline)
1407 cbD->cmdList->SetGraphicsRootDescriptorTable(rootParamIndex, samplerDescriptor.gpuHandle);
1408 else if (cbD->currentComputePipeline)
1409 cbD->cmdList->SetComputeRootDescriptorTable(rootParamIndex, samplerDescriptor.gpuHandle);
1410
1411 rootParamIndex += 1;
1412 }
1413 }
1414 for (int s = 0; s < 6; ++s) {
1415 if (!visitorData.uavs[s].isEmpty()) {
1416 QD3D12DescriptorHeap &gpuUavHeap(shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot]);
1417 QD3D12Descriptor startDesc = gpuUavHeap.get(visitorData.uavs[s].count());
1418 for (int i = 0, count = visitorData.uavs[s].count(); i < count; ++i) {
1419 const auto &uav(visitorData.uavs[s][i]);
1420 if (QD3D12Resource *res = resourcePool.lookupRef(uav.first)) {
1421 dev->CreateUnorderedAccessView(res->resource, nullptr, &uav.second,
1422 gpuUavHeap.incremented(startDesc, i).cpuHandle);
1423 } else {
1424 dev->CreateUnorderedAccessView(nullptr, nullptr, nullptr,
1425 gpuUavHeap.incremented(startDesc, i).cpuHandle);
1426 }
1427 }
1428
1429 if (cbD->currentGraphicsPipeline)
1430 cbD->cmdList->SetGraphicsRootDescriptorTable(rootParamIndex, startDesc.gpuHandle);
1431 else if (cbD->currentComputePipeline)
1432 cbD->cmdList->SetComputeRootDescriptorTable(rootParamIndex, startDesc.gpuHandle);
1433
1434 rootParamIndex += 1;
1435 }
1436 }
1437
1438 if (gfxPsD) {
1439 cbD->currentGraphicsSrb = srb;
1440 cbD->currentComputeSrb = nullptr;
1441 } else {
1442 cbD->currentGraphicsSrb = nullptr;
1443 cbD->currentComputeSrb = srb;
1444 }
1445 cbD->currentSrbGeneration = srbD->generation;
1446 }
1447}
1448
1449void QRhiD3D12::setVertexInput(QRhiCommandBuffer *cb,
1450 int startBinding, int bindingCount, const QRhiCommandBuffer::VertexInput *bindings,
1451 QRhiBuffer *indexBuf, quint32 indexOffset, QRhiCommandBuffer::IndexFormat indexFormat)
1452{
1453 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1454 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1455
1456 bool needsBindVBuf = false;
1457 for (int i = 0; i < bindingCount; ++i) {
1458 const int inputSlot = startBinding + i;
1459 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, bindings[i].first);
1460 Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::VertexBuffer));
1461 const bool isDynamic = bufD->m_type == QRhiBuffer::Dynamic;
1462 if (isDynamic)
1463 bufD->executeHostWritesForFrameSlot(currentFrameSlot);
1464
1465 if (cbD->currentVertexBuffers[inputSlot] != bufD->handles[isDynamic ? currentFrameSlot : 0]
1466 || cbD->currentVertexOffsets[inputSlot] != bindings[i].second)
1467 {
1468 needsBindVBuf = true;
1469 cbD->currentVertexBuffers[inputSlot] = bufD->handles[isDynamic ? currentFrameSlot : 0];
1470 cbD->currentVertexOffsets[inputSlot] = bindings[i].second;
1471 }
1472 }
1473
1474 if (needsBindVBuf) {
1475 QVarLengthArray<D3D12_VERTEX_BUFFER_VIEW, 4> vbv;
1476 vbv.reserve(bindingCount);
1477
1478 QD3D12GraphicsPipeline *psD = cbD->currentGraphicsPipeline;
1479 const QRhiVertexInputLayout &inputLayout(psD->m_vertexInputLayout);
1480 const int inputBindingCount = inputLayout.cendBindings() - inputLayout.cbeginBindings();
1481
1482 for (int i = 0, ie = qMin(bindingCount, inputBindingCount); i != ie; ++i) {
1483 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, bindings[i].first);
1484 const QD3D12ObjectHandle handle = bufD->handles[bufD->m_type == QRhiBuffer::Dynamic ? currentFrameSlot : 0];
1485 const quint32 offset = bindings[i].second;
1486 const quint32 stride = inputLayout.bindingAt(i)->stride();
1487
1488 if (bufD->m_type != QRhiBuffer::Dynamic) {
1489 barrierGen.addTransitionBarrier(handle, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
1490 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1491 }
1492
1493 if (QD3D12Resource *res = resourcePool.lookupRef(handle)) {
1494 vbv.append({
1495 res->resource->GetGPUVirtualAddress() + offset,
1496 UINT(res->desc.Width - offset),
1497 stride
1498 });
1499 }
1500 }
1501
1502 cbD->cmdList->IASetVertexBuffers(UINT(startBinding), vbv.count(), vbv.constData());
1503 }
1504
1505 if (indexBuf) {
1506 QD3D12Buffer *ibufD = QRHI_RES(QD3D12Buffer, indexBuf);
1507 Q_ASSERT(ibufD->m_usage.testFlag(QRhiBuffer::IndexBuffer));
1508 const bool isDynamic = ibufD->m_type == QRhiBuffer::Dynamic;
1509 if (isDynamic)
1510 ibufD->executeHostWritesForFrameSlot(currentFrameSlot);
1511
1512 const DXGI_FORMAT dxgiFormat = indexFormat == QRhiCommandBuffer::IndexUInt16 ? DXGI_FORMAT_R16_UINT
1513 : DXGI_FORMAT_R32_UINT;
1514 if (cbD->currentIndexBuffer != ibufD->handles[isDynamic ? currentFrameSlot : 0]
1515 || cbD->currentIndexOffset != indexOffset
1516 || cbD->currentIndexFormat != dxgiFormat)
1517 {
1518 cbD->currentIndexBuffer = ibufD->handles[isDynamic ? currentFrameSlot : 0];
1519 cbD->currentIndexOffset = indexOffset;
1520 cbD->currentIndexFormat = dxgiFormat;
1521
1522 if (ibufD->m_type != QRhiBuffer::Dynamic) {
1523 barrierGen.addTransitionBarrier(cbD->currentIndexBuffer, D3D12_RESOURCE_STATE_INDEX_BUFFER);
1524 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1525 }
1526
1527 if (QD3D12Resource *res = resourcePool.lookupRef(cbD->currentIndexBuffer)) {
1528 const D3D12_INDEX_BUFFER_VIEW ibv = {
1529 res->resource->GetGPUVirtualAddress() + indexOffset,
1530 UINT(res->desc.Width - indexOffset),
1531 dxgiFormat
1532 };
1533 cbD->cmdList->IASetIndexBuffer(&ibv);
1534 }
1535 }
1536 }
1537}
1538
1539void QRhiD3D12::setDefaultScissor(QD3D12CommandBuffer *cbD)
1540{
1541 cbD->hasCustomScissorSet = false;
1542
1543 const QSize outputSize = cbD->currentTarget->pixelSize();
1544 std::array<float, 4> vp = cbD->currentViewport.viewport();
1545 float x = 0, y = 0, w = 0, h = 0;
1546
1547 if (qFuzzyIsNull(vp[2]) && qFuzzyIsNull(vp[3])) {
1548 x = 0;
1549 y = 0;
1550 w = outputSize.width();
1551 h = outputSize.height();
1552 } else {
1553 // x,y is top-left in D3D12_RECT but bottom-left in QRhiScissor
1554 qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, vp, &x, &y, &w, &h);
1555 }
1556
1557 D3D12_RECT r;
1558 r.left = x;
1559 r.top = y;
1560 // right and bottom are exclusive
1561 r.right = x + w;
1562 r.bottom = y + h;
1563 cbD->cmdList->RSSetScissorRects(1, &r);
1564}
1565
1566void QRhiD3D12::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport)
1567{
1568 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1569 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1570 Q_ASSERT(cbD->currentTarget);
1571 const QSize outputSize = cbD->currentTarget->pixelSize();
1572
1573 // D3D expects top-left, QRhiViewport is bottom-left
1574 float x, y, w, h;
1575 if (!qrhi_toTopLeftRenderTargetRect<UnBounded>(outputSize, viewport.viewport(), &x, &y, &w, &h))
1576 return;
1577
1578 D3D12_VIEWPORT v;
1579 v.TopLeftX = x;
1580 v.TopLeftY = y;
1581 v.Width = w;
1582 v.Height = h;
1583 v.MinDepth = viewport.minDepth();
1584 v.MaxDepth = viewport.maxDepth();
1585 cbD->cmdList->RSSetViewports(1, &v);
1586
1587 cbD->currentViewport = viewport;
1588 if (cbD->currentGraphicsPipeline
1589 && !cbD->currentGraphicsPipeline->flags().testFlag(QRhiGraphicsPipeline::UsesScissor))
1590 {
1591 setDefaultScissor(cbD);
1592 }
1593}
1594
1595void QRhiD3D12::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor)
1596{
1597 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1598 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1599 Q_ASSERT(cbD->currentTarget);
1600 const QSize outputSize = cbD->currentTarget->pixelSize();
1601
1602 // D3D expects top-left, QRhiScissor is bottom-left
1603 int x, y, w, h;
1604 if (!qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, scissor.scissor(), &x, &y, &w, &h))
1605 return;
1606
1607 D3D12_RECT r;
1608 r.left = x;
1609 r.top = y;
1610 // right and bottom are exclusive
1611 r.right = x + w;
1612 r.bottom = y + h;
1613 cbD->cmdList->RSSetScissorRects(1, &r);
1614
1615 cbD->hasCustomScissorSet = true;
1616}
1617
1618void QRhiD3D12::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c)
1619{
1620 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1621 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1622 float v[4] = { c.redF(), c.greenF(), c.blueF(), c.alphaF() };
1623 cbD->cmdList->OMSetBlendFactor(v);
1624}
1625
1626void QRhiD3D12::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue)
1627{
1628 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1629 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1630 cbD->cmdList->OMSetStencilRef(refValue);
1631}
1632
1633static inline D3D12_SHADING_RATE toD3DShadingRate(const QSize &coarsePixelSize)
1634{
1635 if (coarsePixelSize == QSize(1, 2))
1636 return D3D12_SHADING_RATE_1X2;
1637 if (coarsePixelSize == QSize(2, 1))
1638 return D3D12_SHADING_RATE_2X1;
1639 if (coarsePixelSize == QSize(2, 2))
1640 return D3D12_SHADING_RATE_2X2;
1641 if (coarsePixelSize == QSize(2, 4))
1642 return D3D12_SHADING_RATE_2X4;
1643 if (coarsePixelSize == QSize(4, 2))
1644 return D3D12_SHADING_RATE_4X2;
1645 if (coarsePixelSize == QSize(4, 4))
1646 return D3D12_SHADING_RATE_4X4;
1647 return D3D12_SHADING_RATE_1X1;
1648}
1649
1650void QRhiD3D12::setShadingRate(QRhiCommandBuffer *cb, const QSize &coarsePixelSize)
1651{
1652 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1653 cbD->hasShadingRateSet = false;
1654
1655#ifdef QRHI_D3D12_CL5_AVAILABLE
1656 if (!caps.vrs)
1657 return;
1658
1659 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1660 const D3D12_SHADING_RATE_COMBINER combiners[] = { D3D12_SHADING_RATE_COMBINER_MAX, D3D12_SHADING_RATE_COMBINER_MAX };
1661 cbD->cmdList->RSSetShadingRate(toD3DShadingRate(coarsePixelSize), combiners);
1662 if (coarsePixelSize.width() != 1 || coarsePixelSize.height() != 1)
1663 cbD->hasShadingRateSet = true;
1664#else
1665 Q_UNUSED(cb);
1666 Q_UNUSED(coarsePixelSize);
1667 qWarning("Attempted to set ShadingRate without building Qt against a sufficiently new Windows SDK and d3d12.h. This cannot work.");
1668#endif
1669}
1670
1671void QRhiD3D12::draw(QRhiCommandBuffer *cb, quint32 vertexCount,
1672 quint32 instanceCount, quint32 firstVertex, quint32 firstInstance)
1673{
1674 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1675 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1676 cbD->cmdList->DrawInstanced(vertexCount, instanceCount, firstVertex, firstInstance);
1677}
1678
1679void QRhiD3D12::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount,
1680 quint32 instanceCount, quint32 firstIndex, qint32 vertexOffset, quint32 firstInstance)
1681{
1682 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1683 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1684 cbD->cmdList->DrawIndexedInstanced(indexCount, instanceCount,
1685 firstIndex, vertexOffset,
1686 firstInstance);
1687}
1688
1689void QRhiD3D12::drawIndirect(QRhiCommandBuffer *cb, QRhiBuffer *indirectBuffer,
1690 quint32 indirectBufferOffset, quint32 drawCount, quint32 stride)
1691{
1692 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1693 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1694
1695 QD3D12Buffer *indirectBufferD = QRHI_RES(QD3D12Buffer, indirectBuffer);
1696 const bool isDynamic = indirectBufferD->m_type == QRhiBuffer::Dynamic;
1697 const QD3D12ObjectHandle indirectBufferHandle = indirectBufferD->handles[isDynamic ? currentFrameSlot : 0];
1698 if (isDynamic) {
1699 indirectBufferD->executeHostWritesForFrameSlot(currentFrameSlot);
1700 } else {
1701 barrierGen.addTransitionBarrier(indirectBufferHandle, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
1702 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1703 }
1704 QD3D12Resource *indirectRes = resourcePool.lookupRef(indirectBufferHandle);
1705 if (!indirectRes)
1706 return;
1707 ID3D12Resource *indirectBufferRes = indirectRes->resource;
1708
1709 const bool canUseMulti = (stride == sizeof(QRhiIndirectDrawCommand) && drawCommandSignature);
1710
1711 if (canUseMulti && drawCount > 1) {
1712 cbD->cmdList->ExecuteIndirect(drawCommandSignature, drawCount,
1713 indirectBufferRes, indirectBufferOffset,
1714 nullptr, 0);
1715 } else {
1716 UINT offset = indirectBufferOffset;
1717 for (quint32 i = 0; i < drawCount; ++i) {
1718 cbD->cmdList->ExecuteIndirect(drawCommandSignature, 1,
1719 indirectBufferRes, offset,
1720 nullptr, 0);
1721 offset += stride;
1722 }
1723 }
1724}
1725
1726void QRhiD3D12::drawIndexedIndirect(QRhiCommandBuffer *cb, QRhiBuffer *indirectBuffer,
1727 quint32 indirectBufferOffset, quint32 drawCount, quint32 stride)
1728{
1729 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1730 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1731
1732 QD3D12Buffer *indirectBufferD = QRHI_RES(QD3D12Buffer, indirectBuffer);
1733 const bool isDynamic = indirectBufferD->m_type == QRhiBuffer::Dynamic;
1734 const QD3D12ObjectHandle indirectBufferHandle = indirectBufferD->handles[isDynamic ? currentFrameSlot : 0];
1735 if (isDynamic) {
1736 indirectBufferD->executeHostWritesForFrameSlot(currentFrameSlot);
1737 } else {
1738 barrierGen.addTransitionBarrier(indirectBufferHandle, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
1739 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1740 }
1741 QD3D12Resource *indirectRes = resourcePool.lookupRef(indirectBufferHandle);
1742 if (!indirectRes)
1743 return;
1744 ID3D12Resource *indirectBufferRes = indirectRes->resource;
1745
1746 const bool canUseMulti = (stride == sizeof(QRhiIndexedIndirectDrawCommand) && drawIndexedCommandSignature);
1747
1748 if (canUseMulti && drawCount > 1) {
1749 cbD->cmdList->ExecuteIndirect(drawIndexedCommandSignature, drawCount,
1750 indirectBufferRes, indirectBufferOffset,
1751 nullptr, 0);
1752 } else {
1753 UINT offset = indirectBufferOffset;
1754 for (quint32 i = 0; i < drawCount; ++i) {
1755 cbD->cmdList->ExecuteIndirect(drawIndexedCommandSignature, 1,
1756 indirectBufferRes, offset,
1757 nullptr, 0);
1758 offset += stride;
1759 }
1760 }
1761}
1762
1763void QRhiD3D12::debugMarkBegin(QRhiCommandBuffer *cb, const QByteArray &name)
1764{
1765 if (!debugMarkers)
1766 return;
1767
1768 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1769#ifdef QRHI_D3D12_HAS_OLD_PIX
1770 PIXBeginEvent(cbD->cmdList, PIX_COLOR_DEFAULT, reinterpret_cast<LPCWSTR>(QString::fromLatin1(name).utf16()));
1771#else
1772 Q_UNUSED(cbD);
1773 Q_UNUSED(name);
1774#endif
1775}
1776
1777void QRhiD3D12::debugMarkEnd(QRhiCommandBuffer *cb)
1778{
1779 if (!debugMarkers)
1780 return;
1781
1782 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1783#ifdef QRHI_D3D12_HAS_OLD_PIX
1784 PIXEndEvent(cbD->cmdList);
1785#else
1786 Q_UNUSED(cbD);
1787#endif
1788}
1789
1790void QRhiD3D12::debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg)
1791{
1792 if (!debugMarkers)
1793 return;
1794
1795 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1796#ifdef QRHI_D3D12_HAS_OLD_PIX
1797 PIXSetMarker(cbD->cmdList, PIX_COLOR_DEFAULT, reinterpret_cast<LPCWSTR>(QString::fromLatin1(msg).utf16()));
1798#else
1799 Q_UNUSED(cbD);
1800 Q_UNUSED(msg);
1801#endif
1802}
1803
1804const QRhiNativeHandles *QRhiD3D12::nativeHandles(QRhiCommandBuffer *cb)
1805{
1806 return QRHI_RES(QD3D12CommandBuffer, cb)->nativeHandles();
1807}
1808
1809void QRhiD3D12::beginExternal(QRhiCommandBuffer *cb)
1810{
1811 Q_UNUSED(cb);
1812}
1813
1814void QRhiD3D12::endExternal(QRhiCommandBuffer *cb)
1815{
1816 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1817 cbD->resetPerPassState();
1818 bindShaderVisibleHeaps(cbD);
1819 if (cbD->currentTarget) { // could be compute, no rendertarget then
1820 QD3D12RenderTargetData *rtD = rtData(cbD->currentTarget);
1821 cbD->cmdList->OMSetRenderTargets(UINT(rtD->colorAttCount),
1822 rtD->rtv,
1823 TRUE,
1824 rtD->dsAttCount ? &rtD->dsv : nullptr);
1825 }
1826}
1827
1828double QRhiD3D12::lastCompletedGpuTime(QRhiCommandBuffer *cb)
1829{
1830 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1831 return cbD->lastGpuTime;
1832}
1833
1834static void calculateGpuTime(QD3D12CommandBuffer *cbD,
1835 int timestampPairStartIndex,
1836 const quint8 *readbackBufPtr,
1837 quint64 timestampTicksPerSecond)
1838{
1839 const size_t byteOffset = timestampPairStartIndex * sizeof(quint64);
1840 const quint64 *p = reinterpret_cast<const quint64 *>(readbackBufPtr + byteOffset);
1841 const quint64 startTime = *p++;
1842 const quint64 endTime = *p;
1843 if (startTime < endTime) {
1844 const quint64 ticks = endTime - startTime;
1845 const double timeSec = ticks / double(timestampTicksPerSecond);
1846 cbD->lastGpuTime = timeSec;
1847 }
1848}
1849
1850QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
1851{
1852 Q_UNUSED(flags);
1853
1854 QD3D12SwapChain *swapChainD = QRHI_RES(QD3D12SwapChain, swapChain);
1855 currentSwapChain = swapChainD;
1856 currentFrameSlot = swapChainD->currentFrameSlot;
1857 QD3D12SwapChain::FrameResources &fr(swapChainD->frameRes[currentFrameSlot]);
1858
1859 // We could do smarter things but mirror the Vulkan backend for now: Make
1860 // sure the previous commands for this same frame slot have finished. Do
1861 // this also for any other swapchain's commands with the same frame slot.
1862 // While this reduces concurrency in render-to-swapchain-A,
1863 // render-to-swapchain-B, repeat kind of scenarios, it keeps resource usage
1864 // safe: swapchain A starting its frame 0, followed by swapchain B starting
1865 // its own frame 0 will make B wait for A's frame 0 commands. If a resource
1866 // is written in B's frame or when B checks for pending resource releases,
1867 // that won't mess up A's in-flight commands (as they are guaranteed not to
1868 // be in flight anymore). With Qt Quick this situation cannot happen anyway
1869 // by design (one QRhi per window).
1870 for (QD3D12SwapChain *sc : std::as_const(swapchains))
1871 sc->waitCommandCompletionForFrameSlot(currentFrameSlot); // note: swapChainD->currentFrameSlot, not sc's
1872
1873 if (swapChainD->frameLatencyWaitableObject) {
1874 // only wait when endFrame() called Present(), otherwise this would become a 1 sec timeout
1875 if (swapChainD->lastFrameLatencyWaitSlot != currentFrameSlot) {
1876 WaitForSingleObjectEx(swapChainD->frameLatencyWaitableObject, 1000, true);
1877 swapChainD->lastFrameLatencyWaitSlot = currentFrameSlot;
1878 }
1879 }
1880
1881 HRESULT hr = cmdAllocators[currentFrameSlot]->Reset();
1882 if (FAILED(hr)) {
1883 qWarning("Failed to reset command allocator: %s",
1884 qPrintable(QSystemError::windowsComString(hr)));
1885 return QRhi::FrameOpError;
1886 }
1887
1888 if (!startCommandListForCurrentFrameSlot(&fr.cmdList))
1889 return QRhi::FrameOpError;
1890
1891 QD3D12CommandBuffer *cbD = &swapChainD->cbWrapper;
1892 cbD->cmdList = fr.cmdList;
1893
1894 swapChainD->rtWrapper.d.rtv[0] = swapChainD->sampleDesc.Count > 1
1895 ? swapChainD->msaaRtvs[swapChainD->currentBackBufferIndex].cpuHandle
1896 : swapChainD->rtvs[swapChainD->currentBackBufferIndex].cpuHandle;
1897
1898 swapChainD->rtWrapper.d.dsv = swapChainD->ds ? swapChainD->ds->dsv.cpuHandle
1899 : D3D12_CPU_DESCRIPTOR_HANDLE { 0 };
1900
1901 if (swapChainD->stereo) {
1902 swapChainD->rtWrapperRight.d.rtv[0] = swapChainD->sampleDesc.Count > 1
1903 ? swapChainD->msaaRtvs[swapChainD->currentBackBufferIndex].cpuHandle
1904 : swapChainD->rtvsRight[swapChainD->currentBackBufferIndex].cpuHandle;
1905
1906 swapChainD->rtWrapperRight.d.dsv =
1907 swapChainD->ds ? swapChainD->ds->dsv.cpuHandle : D3D12_CPU_DESCRIPTOR_HANDLE{ 0 };
1908 }
1909
1910
1911 // Time to release things that are marked for currentFrameSlot since due to
1912 // the wait above we know that the previous commands on the GPU for this
1913 // slot must have finished already.
1914 releaseQueue.executeDeferredReleases(currentFrameSlot);
1915
1916 // Full reset of the command buffer data.
1917 cbD->resetState();
1918
1919 // Move the head back to zero for the per-frame shader-visible descriptor heap work areas.
1920 shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot].head = 0;
1921 // Same for the small staging area.
1922 smallStagingAreas[currentFrameSlot].head = 0;
1923
1924 bindShaderVisibleHeaps(cbD);
1925
1926 finishActiveReadbacks(); // last, in case the readback-completed callback issues rhi calls
1927
1928 if (timestampQueryHeap.isValid() && timestampTicksPerSecond) {
1929 // Read the timestamps for the previous frame for this slot. (the
1930 // ResolveQuery() should have completed by now due to the wait above)
1931 const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT;
1932 calculateGpuTime(cbD,
1933 timestampPairStartIndex,
1934 timestampReadbackArea.mem.p,
1935 timestampTicksPerSecond);
1936 // Write the start timestamp for this frame for this slot.
1937 cbD->cmdList->EndQuery(timestampQueryHeap.heap,
1938 D3D12_QUERY_TYPE_TIMESTAMP,
1939 timestampPairStartIndex);
1940 }
1941
1942 QDxgiVSyncService::instance()->beginFrame(adapterLuid);
1943
1944 return QRhi::FrameOpSuccess;
1945}
1946
1947QRhi::FrameOpResult QRhiD3D12::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrameFlags flags)
1948{
1949 QD3D12SwapChain *swapChainD = QRHI_RES(QD3D12SwapChain, swapChain);
1950 Q_ASSERT(currentSwapChain == swapChainD);
1951 QD3D12CommandBuffer *cbD = &swapChainD->cbWrapper;
1952
1953 QD3D12ObjectHandle backBufferResourceHandle = swapChainD->colorBuffers[swapChainD->currentBackBufferIndex];
1954 if (swapChainD->sampleDesc.Count > 1) {
1955 QD3D12ObjectHandle msaaBackBufferResourceHandle = swapChainD->msaaBuffers[swapChainD->currentBackBufferIndex];
1956 barrierGen.addTransitionBarrier(msaaBackBufferResourceHandle, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
1957 barrierGen.addTransitionBarrier(backBufferResourceHandle, D3D12_RESOURCE_STATE_RESOLVE_DEST);
1958 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1959 const QD3D12Resource *src = resourcePool.lookupRef(msaaBackBufferResourceHandle);
1960 const QD3D12Resource *dst = resourcePool.lookupRef(backBufferResourceHandle);
1961 if (src && dst)
1962 cbD->cmdList->ResolveSubresource(dst->resource, 0, src->resource, 0, swapChainD->colorFormat);
1963 }
1964
1965 barrierGen.addTransitionBarrier(backBufferResourceHandle, D3D12_RESOURCE_STATE_PRESENT);
1966 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1967
1968 if (timestampQueryHeap.isValid()) {
1969 const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT;
1970 cbD->cmdList->EndQuery(timestampQueryHeap.heap,
1971 D3D12_QUERY_TYPE_TIMESTAMP,
1972 timestampPairStartIndex + 1);
1973 cbD->cmdList->ResolveQueryData(timestampQueryHeap.heap,
1974 D3D12_QUERY_TYPE_TIMESTAMP,
1975 timestampPairStartIndex,
1976 2,
1977 timestampReadbackArea.mem.buffer,
1978 timestampPairStartIndex * sizeof(quint64));
1979 }
1980
1981 D3D12GraphicsCommandList *cmdList = cbD->cmdList;
1982 HRESULT hr = cmdList->Close();
1983 if (FAILED(hr)) {
1984 qWarning("Failed to close command list: %s",
1985 qPrintable(QSystemError::windowsComString(hr)));
1986 return QRhi::FrameOpError;
1987 }
1988
1989 ID3D12CommandList *execList[] = { cmdList };
1990 cmdQueue->ExecuteCommandLists(1, execList);
1991
1992 if (!flags.testFlag(QRhi::SkipPresent)) {
1993 UINT presentFlags = 0;
1994 if (swapChainD->swapInterval == 0
1995 && (swapChainD->swapChainFlags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING))
1996 {
1997 presentFlags |= DXGI_PRESENT_ALLOW_TEARING;
1998 }
1999 if (!swapChainD->swapChain) {
2000 qWarning("Failed to present, no swapchain");
2001 return QRhi::FrameOpError;
2002 }
2003 HRESULT hr = swapChainD->swapChain->Present(swapChainD->swapInterval, presentFlags);
2004 if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
2005 qWarning("Device loss detected in Present()");
2006 deviceLost = true;
2007 return QRhi::FrameOpDeviceLost;
2008 } else if (FAILED(hr)) {
2009 qWarning("Failed to present: %s", qPrintable(QSystemError::windowsComString(hr)));
2010 return QRhi::FrameOpError;
2011 }
2012
2013 if (dcompDevice && swapChainD->dcompTarget && swapChainD->dcompVisual)
2014 dcompDevice->Commit();
2015 }
2016
2017 swapChainD->addCommandCompletionSignalForCurrentFrameSlot();
2018
2019 // NB! The deferred-release mechanism here differs from the older QRhi
2020 // backends. There is no lastActiveFrameSlot tracking. Instead,
2021 // currentFrameSlot is written to the registered entries now, and so the
2022 // resources will get released in the frames_in_flight'th beginFrame()
2023 // counting starting from now.
2024 releaseQueue.activatePendingDeferredReleaseRequests(currentFrameSlot);
2025
2026 if (!flags.testFlag(QRhi::SkipPresent)) {
2027 // Only move to the next slot if we presented. Otherwise will block and
2028 // wait for completion in the next beginFrame already, but SkipPresent
2029 // should be infrequent anyway.
2030 swapChainD->currentFrameSlot = (swapChainD->currentFrameSlot + 1) % QD3D12_FRAMES_IN_FLIGHT;
2031 swapChainD->currentBackBufferIndex = swapChainD->swapChain->GetCurrentBackBufferIndex();
2032 }
2033
2034 currentSwapChain = nullptr;
2035 return QRhi::FrameOpSuccess;
2036}
2037
2038QRhi::FrameOpResult QRhiD3D12::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi::BeginFrameFlags flags)
2039{
2040 Q_UNUSED(flags);
2041
2042 // Switch to the next slot manually. Swapchains do not know about this
2043 // which is good. So for example an onscreen, onscreen, offscreen,
2044 // onscreen, onscreen, onscreen sequence of frames leads to 0, 1, 0, 0, 1,
2045 // 0. (no strict alternation anymore) But this is not different from what
2046 // happens when multiple swapchains are involved. Offscreen frames are
2047 // synchronous anyway in the sense that they wait for execution to complete
2048 // in endOffscreenFrame, so no resources used in that frame are busy
2049 // anymore in the next frame.
2050
2051 currentFrameSlot = (currentFrameSlot + 1) % QD3D12_FRAMES_IN_FLIGHT;
2052
2053 for (QD3D12SwapChain *sc : std::as_const(swapchains))
2054 sc->waitCommandCompletionForFrameSlot(currentFrameSlot); // note: not sc's currentFrameSlot
2055
2056 HRESULT hr = cmdAllocators[currentFrameSlot]->Reset();
2057 if (FAILED(hr)) {
2058 qWarning("Failed to reset command allocator: %s",
2059 qPrintable(QSystemError::windowsComString(hr)));
2060 return QRhi::FrameOpError;
2061 }
2062
2063 if (!offscreenCb[currentFrameSlot])
2064 offscreenCb[currentFrameSlot] = new QD3D12CommandBuffer(this);
2065 QD3D12CommandBuffer *cbD = offscreenCb[currentFrameSlot];
2066 if (!startCommandListForCurrentFrameSlot(&cbD->cmdList))
2067 return QRhi::FrameOpError;
2068
2069 releaseQueue.executeDeferredReleases(currentFrameSlot);
2070 cbD->resetState();
2071 shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot].head = 0;
2072 smallStagingAreas[currentFrameSlot].head = 0;
2073
2074 bindShaderVisibleHeaps(cbD);
2075
2076 if (timestampQueryHeap.isValid() && timestampTicksPerSecond) {
2077 cbD->cmdList->EndQuery(timestampQueryHeap.heap,
2078 D3D12_QUERY_TYPE_TIMESTAMP,
2079 currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT);
2080 }
2081
2082 offscreenActive = true;
2083 *cb = cbD;
2084
2085 return QRhi::FrameOpSuccess;
2086}
2087
2088QRhi::FrameOpResult QRhiD3D12::endOffscreenFrame(QRhi::EndFrameFlags flags)
2089{
2090 Q_UNUSED(flags);
2091 Q_ASSERT(offscreenActive);
2092 offscreenActive = false;
2093
2094 QD3D12CommandBuffer *cbD = offscreenCb[currentFrameSlot];
2095 if (timestampQueryHeap.isValid()) {
2096 const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT;
2097 cbD->cmdList->EndQuery(timestampQueryHeap.heap,
2098 D3D12_QUERY_TYPE_TIMESTAMP,
2099 timestampPairStartIndex + 1);
2100 cbD->cmdList->ResolveQueryData(timestampQueryHeap.heap,
2101 D3D12_QUERY_TYPE_TIMESTAMP,
2102 timestampPairStartIndex,
2103 2,
2104 timestampReadbackArea.mem.buffer,
2105 timestampPairStartIndex * sizeof(quint64));
2106 }
2107
2108 D3D12GraphicsCommandList *cmdList = cbD->cmdList;
2109 HRESULT hr = cmdList->Close();
2110 if (FAILED(hr)) {
2111 qWarning("Failed to close command list: %s",
2112 qPrintable(QSystemError::windowsComString(hr)));
2113 return QRhi::FrameOpError;
2114 }
2115
2116 ID3D12CommandList *execList[] = { cmdList };
2117 cmdQueue->ExecuteCommandLists(1, execList);
2118
2119 releaseQueue.activatePendingDeferredReleaseRequests(currentFrameSlot);
2120
2121 // wait for completion
2122 waitGpu();
2123
2124 // Here we know that executing the host-side reads for this (or any
2125 // previous) frame is safe since we waited for completion above.
2126 finishActiveReadbacks(true);
2127
2128 // the timestamp query results should be available too, given the wait
2129 if (timestampQueryHeap.isValid()) {
2130 calculateGpuTime(cbD,
2131 currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT,
2132 timestampReadbackArea.mem.p,
2133 timestampTicksPerSecond);
2134 }
2135
2136 return QRhi::FrameOpSuccess;
2137}
2138
2139QRhi::FrameOpResult QRhiD3D12::finish()
2140{
2141 QD3D12CommandBuffer *cbD = nullptr;
2142 if (inFrame) {
2143 if (offscreenActive) {
2144 Q_ASSERT(!currentSwapChain);
2145 cbD = offscreenCb[currentFrameSlot];
2146 } else {
2147 Q_ASSERT(currentSwapChain);
2148 cbD = &currentSwapChain->cbWrapper;
2149 }
2150 if (!cbD)
2151 return QRhi::FrameOpError;
2152
2153 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::NoPass);
2154
2155 D3D12GraphicsCommandList *cmdList = cbD->cmdList;
2156 HRESULT hr = cmdList->Close();
2157 if (FAILED(hr)) {
2158 qWarning("Failed to close command list: %s",
2159 qPrintable(QSystemError::windowsComString(hr)));
2160 return QRhi::FrameOpError;
2161 }
2162
2163 ID3D12CommandList *execList[] = { cmdList };
2164 cmdQueue->ExecuteCommandLists(1, execList);
2165
2166 releaseQueue.activatePendingDeferredReleaseRequests(currentFrameSlot);
2167 }
2168
2169 // full blocking wait for everything, frame slots do not matter now
2170 waitGpu();
2171
2172 if (inFrame) {
2173 HRESULT hr = cmdAllocators[currentFrameSlot]->Reset();
2174 if (FAILED(hr)) {
2175 qWarning("Failed to reset command allocator: %s",
2176 qPrintable(QSystemError::windowsComString(hr)));
2177 return QRhi::FrameOpError;
2178 }
2179
2180 if (!startCommandListForCurrentFrameSlot(&cbD->cmdList))
2181 return QRhi::FrameOpError;
2182
2183 cbD->resetState();
2184
2185 shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot].head = 0;
2186 smallStagingAreas[currentFrameSlot].head = 0;
2187
2188 bindShaderVisibleHeaps(cbD);
2189 }
2190
2191 releaseQueue.releaseAll();
2192 finishActiveReadbacks(true);
2193
2194 return QRhi::FrameOpSuccess;
2195}
2196
2197void QRhiD3D12::resourceUpdate(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
2198{
2199 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2200 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::NoPass);
2201 enqueueResourceUpdates(cbD, resourceUpdates);
2202}
2203
2204void QRhiD3D12::beginPass(QRhiCommandBuffer *cb,
2205 QRhiRenderTarget *rt,
2206 const QColor &colorClearValue,
2207 const QRhiDepthStencilClearValue &depthStencilClearValue,
2208 QRhiResourceUpdateBatch *resourceUpdates,
2209 QRhiCommandBuffer::BeginPassFlags)
2210{
2211 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2212 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::NoPass);
2213
2214 if (resourceUpdates)
2215 enqueueResourceUpdates(cbD, resourceUpdates);
2216
2217 QD3D12RenderTargetData *rtD = rtData(rt);
2218 bool wantsColorClear = true;
2219 bool wantsDsClear = true;
2220 if (rt->resourceType() == QRhiRenderTarget::TextureRenderTarget) {
2221 QD3D12TextureRenderTarget *rtTex = QRHI_RES(QD3D12TextureRenderTarget, rt);
2222 wantsColorClear = !rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents);
2223 wantsDsClear = !rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents);
2224 if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QD3D12Texture, QD3D12RenderBuffer>(rtTex->description(), rtD->currentResIdList))
2225 rtTex->create();
2226
2227 for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments(); it != itEnd; ++it) {
2228 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, it->texture());
2229 QD3D12Texture *resolveTexD = QRHI_RES(QD3D12Texture, it->resolveTexture());
2230 QD3D12RenderBuffer *rbD = QRHI_RES(QD3D12RenderBuffer, it->renderBuffer());
2231 if (texD)
2232 barrierGen.addTransitionBarrier(texD->handle, D3D12_RESOURCE_STATE_RENDER_TARGET);
2233 else if (rbD)
2234 barrierGen.addTransitionBarrier(rbD->handle, D3D12_RESOURCE_STATE_RENDER_TARGET);
2235 if (resolveTexD)
2236 barrierGen.addTransitionBarrier(resolveTexD->handle, D3D12_RESOURCE_STATE_RENDER_TARGET);
2237 }
2238 if (rtTex->m_desc.depthStencilBuffer()) {
2239 QD3D12RenderBuffer *rbD = QRHI_RES(QD3D12RenderBuffer, rtTex->m_desc.depthStencilBuffer());
2240 Q_ASSERT(rbD->m_type == QRhiRenderBuffer::DepthStencil);
2241 barrierGen.addTransitionBarrier(rbD->handle, D3D12_RESOURCE_STATE_DEPTH_WRITE);
2242 } else if (rtTex->m_desc.depthTexture()) {
2243 QD3D12Texture *depthTexD = QRHI_RES(QD3D12Texture, rtTex->m_desc.depthTexture());
2244 barrierGen.addTransitionBarrier(depthTexD->handle, D3D12_RESOURCE_STATE_DEPTH_WRITE);
2245 }
2246 barrierGen.enqueueBufferedTransitionBarriers(cbD);
2247 } else {
2248 Q_ASSERT(currentSwapChain);
2249 barrierGen.addTransitionBarrier(currentSwapChain->sampleDesc.Count > 1
2250 ? currentSwapChain->msaaBuffers[currentSwapChain->currentBackBufferIndex]
2251 : currentSwapChain->colorBuffers[currentSwapChain->currentBackBufferIndex],
2252 D3D12_RESOURCE_STATE_RENDER_TARGET);
2253 barrierGen.enqueueBufferedTransitionBarriers(cbD);
2254 }
2255
2256 cbD->cmdList->OMSetRenderTargets(UINT(rtD->colorAttCount),
2257 rtD->rtv,
2258 TRUE,
2259 rtD->dsAttCount ? &rtD->dsv : nullptr);
2260
2261 if (rtD->colorAttCount && wantsColorClear) {
2262 float clearColor[4] = {
2263 colorClearValue.redF(),
2264 colorClearValue.greenF(),
2265 colorClearValue.blueF(),
2266 colorClearValue.alphaF()
2267 };
2268 for (int i = 0; i < rtD->colorAttCount; ++i)
2269 cbD->cmdList->ClearRenderTargetView(rtD->rtv[i], clearColor, 0, nullptr);
2270 }
2271 if (rtD->dsAttCount && wantsDsClear) {
2272 cbD->cmdList->ClearDepthStencilView(rtD->dsv,
2273 D3D12_CLEAR_FLAGS(D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL),
2274 depthStencilClearValue.depthClearValue(),
2275 UINT8(depthStencilClearValue.stencilClearValue()),
2276 0,
2277 nullptr);
2278 }
2279
2280 cbD->recordingPass = QD3D12CommandBuffer::RenderPass;
2281 cbD->currentTarget = rt;
2282
2283 bool hasShadingRateMapSet = false;
2284#ifdef QRHI_D3D12_CL5_AVAILABLE
2285 if (rtD->rp->hasShadingRateMap) {
2286 cbD->setShadingRate(QSize(1, 1));
2287 QD3D12ShadingRateMap *rateMapD = rt->resourceType() == QRhiRenderTarget::TextureRenderTarget
2288 ? QRHI_RES(QD3D12ShadingRateMap, QRHI_RES(QD3D12TextureRenderTarget, rt)->m_desc.shadingRateMap())
2289 : QRHI_RES(QD3D12ShadingRateMap, QRHI_RES(QD3D12SwapChainRenderTarget, rt)->swapChain()->shadingRateMap());
2290 if (QD3D12Resource *res = resourcePool.lookupRef(rateMapD->handle)) {
2291 barrierGen.addTransitionBarrier(rateMapD->handle, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE);
2292 barrierGen.enqueueBufferedTransitionBarriers(cbD);
2293 cbD->cmdList->RSSetShadingRateImage(res->resource);
2294 hasShadingRateMapSet = true;
2295 }
2296 } else if (cbD->hasShadingRateMapSet) {
2297 cbD->cmdList->RSSetShadingRateImage(nullptr);
2298 cbD->setShadingRate(QSize(1, 1));
2299 } else if (cbD->hasShadingRateSet) {
2300 cbD->setShadingRate(QSize(1, 1));
2301 }
2302#endif
2303
2304 cbD->resetPerPassState();
2305
2306 // shading rate tracking is reset in resetPerPassState(), sync what we did just above
2307 cbD->hasShadingRateMapSet = hasShadingRateMapSet;
2308}
2309
2310void QRhiD3D12::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
2311{
2312 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2313 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
2314
2315 if (cbD->currentTarget->resourceType() == QRhiResource::TextureRenderTarget) {
2316 QD3D12TextureRenderTarget *rtTex = QRHI_RES(QD3D12TextureRenderTarget, cbD->currentTarget);
2317 for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments();
2318 it != itEnd; ++it)
2319 {
2320 const QRhiColorAttachment &colorAtt(*it);
2321 if (!colorAtt.resolveTexture())
2322 continue;
2323
2324 QD3D12Texture *dstTexD = QRHI_RES(QD3D12Texture, colorAtt.resolveTexture());
2325 QD3D12Resource *dstRes = resourcePool.lookupRef(dstTexD->handle);
2326 if (!dstRes)
2327 continue;
2328
2329 QD3D12Texture *srcTexD = QRHI_RES(QD3D12Texture, colorAtt.texture());
2330 QD3D12RenderBuffer *srcRbD = QRHI_RES(QD3D12RenderBuffer, colorAtt.renderBuffer());
2331 Q_ASSERT(srcTexD || srcRbD);
2332 QD3D12Resource *srcRes = resourcePool.lookupRef(srcTexD ? srcTexD->handle : srcRbD->handle);
2333 if (!srcRes)
2334 continue;
2335
2336 if (srcTexD) {
2337 if (srcTexD->dxgiFormat != dstTexD->dxgiFormat) {
2338 qWarning("Resolve source (%d) and destination (%d) formats do not match",
2339 int(srcTexD->dxgiFormat), int(dstTexD->dxgiFormat));
2340 continue;
2341 }
2342 if (srcTexD->sampleDesc.Count <= 1) {
2343 qWarning("Cannot resolve a non-multisample texture");
2344 continue;
2345 }
2346 if (srcTexD->m_pixelSize != dstTexD->m_pixelSize) {
2347 qWarning("Resolve source and destination sizes do not match");
2348 continue;
2349 }
2350 } else {
2351 if (srcRbD->dxgiFormat != dstTexD->dxgiFormat) {
2352 qWarning("Resolve source (%d) and destination (%d) formats do not match",
2353 int(srcRbD->dxgiFormat), int(dstTexD->dxgiFormat));
2354 continue;
2355 }
2356 if (srcRbD->m_pixelSize != dstTexD->m_pixelSize) {
2357 qWarning("Resolve source and destination sizes do not match");
2358 continue;
2359 }
2360 }
2361
2362 barrierGen.addTransitionBarrier(srcTexD ? srcTexD->handle : srcRbD->handle, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
2363 barrierGen.addTransitionBarrier(dstTexD->handle, D3D12_RESOURCE_STATE_RESOLVE_DEST);
2364 barrierGen.enqueueBufferedTransitionBarriers(cbD);
2365
2366 const UINT resolveCount = colorAtt.multiViewCount() >= 2 ? colorAtt.multiViewCount() : 1;
2367 for (UINT resolveIdx = 0; resolveIdx < resolveCount; ++resolveIdx) {
2368 const UINT srcSubresource = calcSubresource(0, UINT(colorAtt.layer()) + resolveIdx, 1);
2369 const UINT dstSubresource = calcSubresource(UINT(colorAtt.resolveLevel()),
2370 UINT(colorAtt.resolveLayer()) + resolveIdx,
2371 dstTexD->mipLevelCount);
2372 cbD->cmdList->ResolveSubresource(dstRes->resource, dstSubresource,
2373 srcRes->resource, srcSubresource,
2374 dstTexD->dxgiFormat);
2375 }
2376 }
2377 if (rtTex->m_desc.depthResolveTexture())
2378 qWarning("Resolving multisample depth-stencil buffers is not supported with D3D");
2379 }
2380
2381 cbD->recordingPass = QD3D12CommandBuffer::NoPass;
2382 cbD->currentTarget = nullptr;
2383
2384 if (resourceUpdates)
2385 enqueueResourceUpdates(cbD, resourceUpdates);
2386}
2387
2388void QRhiD3D12::beginComputePass(QRhiCommandBuffer *cb,
2389 QRhiResourceUpdateBatch *resourceUpdates,
2390 QRhiCommandBuffer::BeginPassFlags)
2391{
2392 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2393 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::NoPass);
2394
2395 if (resourceUpdates)
2396 enqueueResourceUpdates(cbD, resourceUpdates);
2397
2398 cbD->recordingPass = QD3D12CommandBuffer::ComputePass;
2399
2400 cbD->resetPerPassState();
2401}
2402
2403void QRhiD3D12::endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
2404{
2405 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2406 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::ComputePass);
2407
2408 cbD->recordingPass = QD3D12CommandBuffer::NoPass;
2409
2410 if (resourceUpdates)
2411 enqueueResourceUpdates(cbD, resourceUpdates);
2412}
2413
2414void QRhiD3D12::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps)
2415{
2416 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2417 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::ComputePass);
2418 QD3D12ComputePipeline *psD = QRHI_RES(QD3D12ComputePipeline, ps);
2419 const bool pipelineChanged = cbD->currentComputePipeline != psD || cbD->currentPipelineGeneration != psD->generation;
2420
2421 if (pipelineChanged) {
2422 cbD->currentGraphicsPipeline = nullptr;
2423 cbD->currentComputePipeline = psD;
2424 cbD->currentPipelineGeneration = psD->generation;
2425
2426 if (QD3D12Pipeline *pipeline = pipelinePool.lookupRef(psD->handle)) {
2427 Q_ASSERT(pipeline->type == QD3D12Pipeline::Compute);
2428 cbD->cmdList->SetPipelineState(pipeline->pso);
2429 if (QD3D12RootSignature *rs = rootSignaturePool.lookupRef(psD->rootSigHandle))
2430 cbD->cmdList->SetComputeRootSignature(rs->rootSig);
2431 }
2432 }
2433}
2434
2435void QRhiD3D12::dispatch(QRhiCommandBuffer *cb, int x, int y, int z)
2436{
2437 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2438 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::ComputePass);
2439 cbD->cmdList->Dispatch(UINT(x), UINT(y), UINT(z));
2440}
2441
2442bool QD3D12DescriptorHeap::create(ID3D12Device *device,
2443 quint32 descriptorCount,
2444 D3D12_DESCRIPTOR_HEAP_TYPE heapType,
2445 D3D12_DESCRIPTOR_HEAP_FLAGS heapFlags)
2446{
2447 head = 0;
2448 capacity = descriptorCount;
2449 this->heapType = heapType;
2450 this->heapFlags = heapFlags;
2451
2452 D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
2453 heapDesc.Type = heapType;
2454 heapDesc.NumDescriptors = capacity;
2455 heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAGS(heapFlags);
2456
2457 HRESULT hr = device->CreateDescriptorHeap(&heapDesc, __uuidof(ID3D12DescriptorHeap), reinterpret_cast<void **>(&heap));
2458 if (FAILED(hr)) {
2459 qWarning("Failed to create descriptor heap: %s", qPrintable(QSystemError::windowsComString(hr)));
2460 heap = nullptr;
2461 capacity = descriptorByteSize = 0;
2462 return false;
2463 }
2464
2465 descriptorByteSize = device->GetDescriptorHandleIncrementSize(heapType);
2466 heapStart.cpuHandle = heap->GetCPUDescriptorHandleForHeapStart();
2467 if (heapFlags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)
2468 heapStart.gpuHandle = heap->GetGPUDescriptorHandleForHeapStart();
2469
2470 return true;
2471}
2472
2473void QD3D12DescriptorHeap::createWithExisting(const QD3D12DescriptorHeap &other,
2474 quint32 offsetInDescriptors,
2475 quint32 descriptorCount)
2476{
2477 heap = nullptr;
2478 head = 0;
2479 capacity = descriptorCount;
2480 heapType = other.heapType;
2481 heapFlags = other.heapFlags;
2482 descriptorByteSize = other.descriptorByteSize;
2483 heapStart = incremented(other.heapStart, offsetInDescriptors);
2484}
2485
2486void QD3D12DescriptorHeap::destroy()
2487{
2488 if (heap) {
2489 heap->Release();
2490 heap = nullptr;
2491 }
2492 capacity = 0;
2493}
2494
2495void QD3D12DescriptorHeap::destroyWithDeferredRelease(QD3D12ReleaseQueue *releaseQueue)
2496{
2497 if (heap) {
2498 releaseQueue->deferredReleaseDescriptorHeap(heap);
2499 heap = nullptr;
2500 }
2501 capacity = 0;
2502}
2503
2504QD3D12Descriptor QD3D12DescriptorHeap::get(quint32 count)
2505{
2506 Q_ASSERT(count > 0);
2507 if (head + count > capacity) {
2508 qWarning("Cannot get %u descriptors as that would exceed capacity %u", count, capacity);
2509 return {};
2510 }
2511 head += count;
2512 return at(head - count);
2513}
2514
2515QD3D12Descriptor QD3D12DescriptorHeap::at(quint32 index) const
2516{
2517 const quint32 startOffset = index * descriptorByteSize;
2518 QD3D12Descriptor result;
2519 result.cpuHandle.ptr = heapStart.cpuHandle.ptr + startOffset;
2520 if (heapStart.gpuHandle.ptr != 0)
2521 result.gpuHandle.ptr = heapStart.gpuHandle.ptr + startOffset;
2522 return result;
2523}
2524
2525bool QD3D12CpuDescriptorPool::create(ID3D12Device *device, D3D12_DESCRIPTOR_HEAP_TYPE heapType, const char *debugName)
2526{
2527 QD3D12DescriptorHeap firstHeap;
2528 if (!firstHeap.create(device, DESCRIPTORS_PER_HEAP, heapType, D3D12_DESCRIPTOR_HEAP_FLAG_NONE))
2529 return false;
2530 heaps.append(HeapWithMap::init(firstHeap, DESCRIPTORS_PER_HEAP));
2531 descriptorByteSize = heaps[0].heap.descriptorByteSize;
2532 this->device = device;
2533 this->debugName = debugName;
2534 return true;
2535}
2536
2537void QD3D12CpuDescriptorPool::destroy()
2538{
2539#ifndef QT_NO_DEBUG
2540 // debug builds: just do it always
2541 static bool leakCheck = true;
2542#else
2543 // release builds: opt-in
2544 static bool leakCheck = qEnvironmentVariableIntValue("QT_RHI_LEAK_CHECK");
2545#endif
2546 if (leakCheck) {
2547 for (const HeapWithMap &heap : std::as_const(heaps)) {
2548 const int leakedDescriptorCount = heap.map.count(true);
2549 if (leakedDescriptorCount > 0) {
2550 qWarning("QD3D12CpuDescriptorPool::destroy(): "
2551 "Heap %p for descriptor pool %p '%s' has %d unreleased descriptors",
2552 &heap.heap, this, debugName, leakedDescriptorCount);
2553 }
2554 }
2555 }
2556 for (HeapWithMap &heap : heaps)
2557 heap.heap.destroy();
2558 heaps.clear();
2559}
2560
2561QD3D12Descriptor QD3D12CpuDescriptorPool::allocate(quint32 count)
2562{
2563 Q_ASSERT(count > 0 && count <= DESCRIPTORS_PER_HEAP);
2564
2565 HeapWithMap &last(heaps.last());
2566 if (last.heap.head + count <= last.heap.capacity) {
2567 quint32 firstIndex = last.heap.head;
2568 for (quint32 i = 0; i < count; ++i)
2569 last.map.setBit(firstIndex + i);
2570 return last.heap.get(count);
2571 }
2572
2573 for (HeapWithMap &heap : heaps) {
2574 quint32 freeCount = 0;
2575 for (quint32 i = 0; i < DESCRIPTORS_PER_HEAP; ++i) {
2576 if (heap.map.testBit(i)) {
2577 freeCount = 0;
2578 } else {
2579 freeCount += 1;
2580 if (freeCount == count) {
2581 quint32 firstIndex = i - (freeCount - 1);
2582 for (quint32 j = 0; j < count; ++j) {
2583 heap.map.setBit(firstIndex + j);
2584 return heap.heap.at(firstIndex);
2585 }
2586 }
2587 }
2588 }
2589 }
2590
2591 QD3D12DescriptorHeap newHeap;
2592 if (!newHeap.create(device, DESCRIPTORS_PER_HEAP, last.heap.heapType, last.heap.heapFlags))
2593 return {};
2594
2595 heaps.append(HeapWithMap::init(newHeap, DESCRIPTORS_PER_HEAP));
2596
2597 for (quint32 i = 0; i < count; ++i)
2598 heaps.last().map.setBit(i);
2599
2600 return heaps.last().heap.get(count);
2601}
2602
2603void QD3D12CpuDescriptorPool::release(const QD3D12Descriptor &descriptor, quint32 count)
2604{
2605 Q_ASSERT(count > 0 && count <= DESCRIPTORS_PER_HEAP);
2606 if (!descriptor.isValid())
2607 return;
2608
2609 const SIZE_T addr = descriptor.cpuHandle.ptr;
2610 for (HeapWithMap &heap : heaps) {
2611 const SIZE_T begin = heap.heap.heapStart.cpuHandle.ptr;
2612 const SIZE_T end = begin + heap.heap.descriptorByteSize * heap.heap.capacity;
2613 if (addr >= begin && addr < end) {
2614 quint32 firstIndex = (addr - begin) / heap.heap.descriptorByteSize;
2615 for (quint32 i = 0; i < count; ++i)
2616 heap.map.setBit(firstIndex + i, false);
2617 return;
2618 }
2619 }
2620
2621 qWarning("QD3D12CpuDescriptorPool::release: Descriptor with address %llu is not in any heap",
2622 quint64(descriptor.cpuHandle.ptr));
2623}
2624
2625bool QD3D12QueryHeap::create(ID3D12Device *device,
2626 quint32 queryCount,
2627 D3D12_QUERY_HEAP_TYPE heapType)
2628{
2629 capacity = queryCount;
2630
2631 D3D12_QUERY_HEAP_DESC heapDesc = {};
2632 heapDesc.Type = heapType;
2633 heapDesc.Count = capacity;
2634
2635 HRESULT hr = device->CreateQueryHeap(&heapDesc, __uuidof(ID3D12QueryHeap), reinterpret_cast<void **>(&heap));
2636 if (FAILED(hr)) {
2637 qWarning("Failed to create query heap: %s", qPrintable(QSystemError::windowsComString(hr)));
2638 heap = nullptr;
2639 capacity = 0;
2640 return false;
2641 }
2642
2643 return true;
2644}
2645
2646void QD3D12QueryHeap::destroy()
2647{
2648 if (heap) {
2649 heap->Release();
2650 heap = nullptr;
2651 }
2652 capacity = 0;
2653}
2654
2655bool QD3D12StagingArea::create(QRhiD3D12 *rhi, quint32 capacity, D3D12_HEAP_TYPE heapType)
2656{
2657 Q_ASSERT(heapType == D3D12_HEAP_TYPE_UPLOAD || heapType == D3D12_HEAP_TYPE_READBACK);
2658 D3D12_RESOURCE_DESC resourceDesc = {};
2659 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
2660 resourceDesc.Width = capacity;
2661 resourceDesc.Height = 1;
2662 resourceDesc.DepthOrArraySize = 1;
2663 resourceDesc.MipLevels = 1;
2664 resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
2665 resourceDesc.SampleDesc = { 1, 0 };
2666 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
2667 resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
2668 UINT state = heapType == D3D12_HEAP_TYPE_UPLOAD ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
2669 HRESULT hr = rhi->vma.createResource(heapType,
2670 &resourceDesc,
2671 D3D12_RESOURCE_STATES(state),
2672 nullptr,
2673 &allocation,
2674 __uuidof(ID3D12Resource),
2675 reinterpret_cast<void **>(&resource));
2676 if (FAILED(hr)) {
2677 qWarning("Failed to create buffer for staging area: %s",
2678 qPrintable(QSystemError::windowsComString(hr)));
2679 return false;
2680 }
2681 void *p = nullptr;
2682 hr = resource->Map(0, nullptr, &p);
2683 if (FAILED(hr)) {
2684 qWarning("Failed to map buffer for staging area: %s",
2685 qPrintable(QSystemError::windowsComString(hr)));
2686 destroy();
2687 return false;
2688 }
2689
2690 mem.p = static_cast<quint8 *>(p);
2691 mem.gpuAddr = resource->GetGPUVirtualAddress();
2692 mem.buffer = resource;
2693 mem.bufferOffset = 0;
2694
2695 this->capacity = capacity;
2696 head = 0;
2697
2698 return true;
2699}
2700
2701void QD3D12StagingArea::destroy()
2702{
2703 if (resource) {
2704 resource->Release();
2705 resource = nullptr;
2706 }
2707 if (allocation) {
2708 allocation->Release();
2709 allocation = nullptr;
2710 }
2711 mem = {};
2712}
2713
2714void QD3D12StagingArea::destroyWithDeferredRelease(QD3D12ReleaseQueue *releaseQueue)
2715{
2716 if (resource)
2717 releaseQueue->deferredReleaseResourceAndAllocation(resource, allocation);
2718 mem = {};
2719}
2720
2721QD3D12StagingArea::Allocation QD3D12StagingArea::get(quint32 byteSize)
2722{
2723 const quint32 allocSize = aligned(byteSize, ALIGNMENT);
2724 if (head + allocSize > capacity) {
2725 qWarning("Failed to allocate %u (%u) bytes from staging area of size %u with %u bytes left",
2726 allocSize, byteSize, capacity, remainingCapacity());
2727 return {};
2728 }
2729 const quint32 offset = head;
2730 head += allocSize;
2731 return {
2732 mem.p + offset,
2733 mem.gpuAddr + offset,
2734 mem.buffer,
2735 offset
2736 };
2737}
2738
2739// Can be called inside and outside of begin-endFrame. Removes from the pool
2740// and releases the underlying native resource only in the frames_in_flight'th
2741// beginFrame() counted starting from the next endFrame().
2742void QD3D12ReleaseQueue::deferredReleaseResource(const QD3D12ObjectHandle &handle)
2743{
2744 DeferredReleaseEntry e;
2745 e.handle = handle;
2746 queue.append(e);
2747}
2748
2749void QD3D12ReleaseQueue::deferredReleaseResourceWithViews(const QD3D12ObjectHandle &handle,
2750 QD3D12CpuDescriptorPool *pool,
2751 const QD3D12Descriptor &viewsStart,
2752 int viewCount)
2753{
2754 DeferredReleaseEntry e;
2755 e.type = DeferredReleaseEntry::Resource;
2756 e.handle = handle;
2757 e.poolForViews = pool;
2758 e.viewsStart = viewsStart;
2759 e.viewCount = viewCount;
2760 queue.append(e);
2761}
2762
2763void QD3D12ReleaseQueue::deferredReleasePipeline(const QD3D12ObjectHandle &handle)
2764{
2765 DeferredReleaseEntry e;
2766 e.type = DeferredReleaseEntry::Pipeline;
2767 e.handle = handle;
2768 queue.append(e);
2769}
2770
2771void QD3D12ReleaseQueue::deferredReleaseRootSignature(const QD3D12ObjectHandle &handle)
2772{
2773 DeferredReleaseEntry e;
2774 e.type = DeferredReleaseEntry::RootSignature;
2775 e.handle = handle;
2776 queue.append(e);
2777}
2778
2779void QD3D12ReleaseQueue::deferredReleaseCallback(std::function<void(void*)> callback, void *userData)
2780{
2781 DeferredReleaseEntry e;
2782 e.type = DeferredReleaseEntry::Callback;
2783 e.callback = callback;
2784 e.callbackUserData = userData;
2785 queue.append(e);
2786}
2787
2788void QD3D12ReleaseQueue::deferredReleaseResourceAndAllocation(ID3D12Resource *resource,
2789 D3D12MA::Allocation *allocation)
2790{
2791 DeferredReleaseEntry e;
2792 e.type = DeferredReleaseEntry::ResourceAndAllocation;
2793 e.resourceAndAllocation = { resource, allocation };
2794 queue.append(e);
2795}
2796
2797void QD3D12ReleaseQueue::deferredReleaseDescriptorHeap(ID3D12DescriptorHeap *heap)
2798{
2799 DeferredReleaseEntry e;
2800 e.type = DeferredReleaseEntry::DescriptorHeap;
2801 e.descriptorHeap = heap;
2802 queue.append(e);
2803}
2804
2805void QD3D12ReleaseQueue::deferredReleaseViews(QD3D12CpuDescriptorPool *pool,
2806 const QD3D12Descriptor &viewsStart,
2807 int viewCount)
2808{
2809 DeferredReleaseEntry e;
2810 e.type = DeferredReleaseEntry::Views;
2811 e.poolForViews = pool;
2812 e.viewsStart = viewsStart;
2813 e.viewCount = viewCount;
2814 queue.append(e);
2815}
2816
2817void QD3D12ReleaseQueue::activatePendingDeferredReleaseRequests(int frameSlot)
2818{
2819 for (DeferredReleaseEntry &e : queue) {
2820 if (!e.frameSlotToBeReleasedIn.has_value())
2821 e.frameSlotToBeReleasedIn = frameSlot;
2822 }
2823}
2824
2825void QD3D12ReleaseQueue::executeDeferredReleases(int frameSlot, bool forced)
2826{
2827 for (int i = queue.count() - 1; i >= 0; --i) {
2828 const DeferredReleaseEntry &e(queue[i]);
2829 if (forced || (e.frameSlotToBeReleasedIn.has_value() && e.frameSlotToBeReleasedIn.value() == frameSlot)) {
2830 switch (e.type) {
2831 case DeferredReleaseEntry::Resource:
2832 resourcePool->remove(e.handle);
2833 if (e.poolForViews && e.viewsStart.isValid() && e.viewCount > 0)
2834 e.poolForViews->release(e.viewsStart, e.viewCount);
2835 break;
2836 case DeferredReleaseEntry::Pipeline:
2837 pipelinePool->remove(e.handle);
2838 break;
2839 case DeferredReleaseEntry::RootSignature:
2840 rootSignaturePool->remove(e.handle);
2841 break;
2842 case DeferredReleaseEntry::Callback:
2843 e.callback(e.callbackUserData);
2844 break;
2845 case DeferredReleaseEntry::ResourceAndAllocation:
2846 // order matters: resource first, then the allocation (which
2847 // may be null)
2848 e.resourceAndAllocation.first->Release();
2849 if (e.resourceAndAllocation.second)
2850 e.resourceAndAllocation.second->Release();
2851 break;
2852 case DeferredReleaseEntry::DescriptorHeap:
2853 e.descriptorHeap->Release();
2854 break;
2855 case DeferredReleaseEntry::Views:
2856 e.poolForViews->release(e.viewsStart, e.viewCount);
2857 break;
2858 }
2859 queue.removeAt(i);
2860 }
2861 }
2862}
2863
2864void QD3D12ReleaseQueue::releaseAll()
2865{
2866 executeDeferredReleases(0, true);
2867}
2868
2869void QD3D12ResourceBarrierGenerator::addTransitionBarrier(const QD3D12ObjectHandle &resourceHandle,
2870 D3D12_RESOURCE_STATES stateAfter)
2871{
2872 if (QD3D12Resource *res = resourcePool->lookupRef(resourceHandle)) {
2873 if (stateAfter != res->state) {
2874 transitionResourceBarriers.append({ resourceHandle, res->state, stateAfter });
2875 res->state = stateAfter;
2876 }
2877 }
2878}
2879
2880void QD3D12ResourceBarrierGenerator::enqueueBufferedTransitionBarriers(QD3D12CommandBuffer *cbD)
2881{
2882 QVarLengthArray<D3D12_RESOURCE_BARRIER, PREALLOC> barriers;
2883 for (const TransitionResourceBarrier &trb : transitionResourceBarriers) {
2884 if (QD3D12Resource *res = resourcePool->lookupRef(trb.resourceHandle)) {
2885 D3D12_RESOURCE_BARRIER barrier = {};
2886 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
2887 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
2888 barrier.Transition.pResource = res->resource;
2889 barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
2890 barrier.Transition.StateBefore = trb.stateBefore;
2891 barrier.Transition.StateAfter = trb.stateAfter;
2892 barriers.append(barrier);
2893 }
2894 }
2895 transitionResourceBarriers.clear();
2896 if (!barriers.isEmpty())
2897 cbD->cmdList->ResourceBarrier(barriers.count(), barriers.constData());
2898}
2899
2900void QD3D12ResourceBarrierGenerator::enqueueSubresourceTransitionBarrier(QD3D12CommandBuffer *cbD,
2901 const QD3D12ObjectHandle &resourceHandle,
2902 UINT subresource,
2903 D3D12_RESOURCE_STATES stateBefore,
2904 D3D12_RESOURCE_STATES stateAfter)
2905{
2906 if (QD3D12Resource *res = resourcePool->lookupRef(resourceHandle)) {
2907 D3D12_RESOURCE_BARRIER barrier = {};
2908 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
2909 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
2910 barrier.Transition.pResource = res->resource;
2911 barrier.Transition.Subresource = subresource;
2912 barrier.Transition.StateBefore = stateBefore;
2913 barrier.Transition.StateAfter = stateAfter;
2914 cbD->cmdList->ResourceBarrier(1, &barrier);
2915 }
2916}
2917
2918void QD3D12ResourceBarrierGenerator::enqueueUavBarrier(QD3D12CommandBuffer *cbD,
2919 const QD3D12ObjectHandle &resourceHandle)
2920{
2921 if (QD3D12Resource *res = resourcePool->lookupRef(resourceHandle)) {
2922 D3D12_RESOURCE_BARRIER barrier = {};
2923 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
2924 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
2925 barrier.UAV.pResource = res->resource;
2926 cbD->cmdList->ResourceBarrier(1, &barrier);
2927 }
2928}
2929
2930void QD3D12ShaderBytecodeCache::insertWithCapacityLimit(const QRhiShaderStage &key, const Shader &s)
2931{
2932 if (data.count() >= QRhiD3D12::MAX_SHADER_CACHE_ENTRIES)
2933 data.clear();
2934 data.insert(key, s);
2935}
2936
2937bool QD3D12ShaderVisibleDescriptorHeap::create(ID3D12Device *device,
2938 D3D12_DESCRIPTOR_HEAP_TYPE type,
2939 quint32 perFrameDescriptorCount)
2940{
2941 Q_ASSERT(type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
2942
2943 quint32 size = perFrameDescriptorCount * QD3D12_FRAMES_IN_FLIGHT;
2944
2945 // https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-support
2946 const quint32 CBV_SRV_UAV_MAX = 1000000;
2947 const quint32 SAMPLER_MAX = 2048;
2948 if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
2949 size = qMin(size, CBV_SRV_UAV_MAX);
2950 else if (type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)
2951 size = qMin(size, SAMPLER_MAX);
2952
2953 if (!heap.create(device, size, type, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) {
2954 qWarning("Failed to create shader-visible descriptor heap of size %u", size);
2955 return false;
2956 }
2957
2958 perFrameDescriptorCount = size / QD3D12_FRAMES_IN_FLIGHT;
2959 quint32 currentOffsetInDescriptors = 0;
2960 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
2961 perFrameHeapSlice[i].createWithExisting(heap, currentOffsetInDescriptors, perFrameDescriptorCount);
2962 currentOffsetInDescriptors += perFrameDescriptorCount;
2963 }
2964
2965 return true;
2966}
2967
2968void QD3D12ShaderVisibleDescriptorHeap::destroy()
2969{
2970 heap.destroy();
2971}
2972
2973void QD3D12ShaderVisibleDescriptorHeap::destroyWithDeferredRelease(QD3D12ReleaseQueue *releaseQueue)
2974{
2975 heap.destroyWithDeferredRelease(releaseQueue);
2976}
2977
2978static inline std::pair<int, int> mapBinding(int binding, const QShader::NativeResourceBindingMap &map)
2979{
2980 if (map.isEmpty())
2981 return { binding, binding }; // assume 1:1 mapping
2982
2983 auto it = map.constFind(binding);
2984 if (it != map.cend())
2985 return *it;
2986
2987 // Hitting this path is normal too. It is not given that the resource is
2988 // present in the shaders for all the stages specified by the visibility
2989 // mask in the QRhiShaderResourceBinding.
2990 return { -1, -1 };
2991}
2992
2993void QD3D12ShaderResourceVisitor::visit()
2994{
2995 for (int bindingIdx = 0, bindingCount = srb->m_bindings.count(); bindingIdx != bindingCount; ++bindingIdx) {
2996 const QRhiShaderResourceBinding &b(srb->m_bindings[bindingIdx]);
2997 const QRhiShaderResourceBinding::Data *bd = QRhiImplementation::shaderResourceBindingData(b);
2998
2999 for (int stageIdx = 0; stageIdx < stageCount; ++stageIdx) {
3000 const QD3D12ShaderStageData *sd = &stageData[stageIdx];
3001 if (!sd->valid)
3002 continue;
3003
3004 if (!bd->stage.testFlag(qd3d12_stageToSrb(sd->stage)))
3005 continue;
3006
3007 switch (bd->type) {
3008 case QRhiShaderResourceBinding::UniformBuffer:
3009 {
3010 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3011 if (shaderRegister >= 0 && uniformBuffer)
3012 uniformBuffer(sd->stage, bd->u.ubuf, shaderRegister, bd->binding);
3013 }
3014 break;
3015 case QRhiShaderResourceBinding::SampledTexture:
3016 {
3017 Q_ASSERT(bd->u.stex.count > 0);
3018 const int textureBaseShaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3019 const int samplerBaseShaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).second;
3020 for (int i = 0; i < bd->u.stex.count; ++i) {
3021 if (textureBaseShaderRegister >= 0 && texture)
3022 texture(sd->stage, bd->u.stex.texSamplers[i], textureBaseShaderRegister + i);
3023 if (samplerBaseShaderRegister >= 0 && sampler)
3024 sampler(sd->stage, bd->u.stex.texSamplers[i], samplerBaseShaderRegister + i);
3025 }
3026 }
3027 break;
3028 case QRhiShaderResourceBinding::Texture:
3029 {
3030 Q_ASSERT(bd->u.stex.count > 0);
3031 const int baseShaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3032 if (baseShaderRegister >= 0 && texture) {
3033 for (int i = 0; i < bd->u.stex.count; ++i)
3034 texture(sd->stage, bd->u.stex.texSamplers[i], baseShaderRegister + i);
3035 }
3036 }
3037 break;
3038 case QRhiShaderResourceBinding::Sampler:
3039 {
3040 Q_ASSERT(bd->u.stex.count > 0);
3041 const int baseShaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3042 if (baseShaderRegister >= 0 && sampler) {
3043 for (int i = 0; i < bd->u.stex.count; ++i)
3044 sampler(sd->stage, bd->u.stex.texSamplers[i], baseShaderRegister + i);
3045 }
3046 }
3047 break;
3048 case QRhiShaderResourceBinding::ImageLoad:
3049 {
3050 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3051 if (shaderRegister >= 0 && storageImage)
3052 storageImage(sd->stage, bd->u.simage, Load, shaderRegister);
3053 }
3054 break;
3055 case QRhiShaderResourceBinding::ImageStore:
3056 {
3057 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3058 if (shaderRegister >= 0 && storageImage)
3059 storageImage(sd->stage, bd->u.simage, Store, shaderRegister);
3060 }
3061 break;
3062 case QRhiShaderResourceBinding::ImageLoadStore:
3063 {
3064 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3065 if (shaderRegister >= 0 && storageImage)
3066 storageImage(sd->stage, bd->u.simage, LoadStore, shaderRegister);
3067 }
3068 break;
3069 case QRhiShaderResourceBinding::BufferLoad:
3070 {
3071 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3072 if (shaderRegister >= 0 && storageBuffer)
3073 storageBuffer(sd->stage, bd->u.sbuf, Load, shaderRegister);
3074 }
3075 break;
3076 case QRhiShaderResourceBinding::BufferStore:
3077 {
3078 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3079 if (shaderRegister >= 0 && storageBuffer)
3080 storageBuffer(sd->stage, bd->u.sbuf, Store, shaderRegister);
3081 }
3082 break;
3083 case QRhiShaderResourceBinding::BufferLoadStore:
3084 {
3085 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3086 if (shaderRegister >= 0 && storageBuffer)
3087 storageBuffer(sd->stage, bd->u.sbuf, LoadStore, shaderRegister);
3088 }
3089 break;
3090 }
3091 }
3092 }
3093}
3094
3095bool QD3D12SamplerManager::create(ID3D12Device *device)
3096{
3097 // This does not need to be per-frame slot, just grab space for MAX_SAMPLERS samplers.
3098 if (!shaderVisibleSamplerHeap.create(device,
3099 D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
3100 MAX_SAMPLERS / QD3D12_FRAMES_IN_FLIGHT))
3101 {
3102 qWarning("Could not create shader-visible SAMPLER heap");
3103 return false;
3104 }
3105
3106 this->device = device;
3107 return true;
3108}
3109
3110void QD3D12SamplerManager::destroy()
3111{
3112 if (device) {
3113 shaderVisibleSamplerHeap.destroy();
3114 device = nullptr;
3115 }
3116}
3117
3118QD3D12Descriptor QD3D12SamplerManager::getShaderVisibleDescriptor(const D3D12_SAMPLER_DESC &desc)
3119{
3120 auto it = gpuMap.constFind({desc});
3121 if (it != gpuMap.cend())
3122 return *it;
3123
3124 QD3D12Descriptor descriptor = shaderVisibleSamplerHeap.heap.get(1);
3125 if (descriptor.isValid()) {
3126 device->CreateSampler(&desc, descriptor.cpuHandle);
3127 gpuMap.insert({desc}, descriptor);
3128 } else {
3129 qWarning("Out of shader-visible SAMPLER descriptor heap space,"
3130 " this should not happen, maximum number of unique samplers is %u",
3131 shaderVisibleSamplerHeap.heap.capacity);
3132 }
3133
3134 return descriptor;
3135}
3136
3137bool QD3D12MipmapGenerator::create(QRhiD3D12 *rhiD)
3138{
3139 this->rhiD = rhiD;
3140
3141 D3D12_ROOT_PARAMETER1 rootParams[3] = {};
3142 D3D12_DESCRIPTOR_RANGE1 descriptorRanges[2] = {};
3143
3144 // b0
3145 rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
3146 rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3147 rootParams[0].Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC;
3148
3149 // t0
3150 descriptorRanges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
3151 descriptorRanges[0].NumDescriptors = 1;
3152 descriptorRanges[0].Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE;
3153 rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
3154 rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3155 rootParams[1].DescriptorTable.NumDescriptorRanges = 1;
3156 rootParams[1].DescriptorTable.pDescriptorRanges = &descriptorRanges[0];
3157
3158 // u0..3
3159 descriptorRanges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
3160 descriptorRanges[1].NumDescriptors = 4;
3161 rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
3162 rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3163 rootParams[2].DescriptorTable.NumDescriptorRanges = 1;
3164 rootParams[2].DescriptorTable.pDescriptorRanges = &descriptorRanges[1];
3165
3166 // s0
3167 D3D12_STATIC_SAMPLER_DESC samplerDesc = {};
3168 samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
3169 samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3170 samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3171 samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3172 samplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3173
3174 D3D12_VERSIONED_ROOT_SIGNATURE_DESC rsDesc = {};
3175 rsDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
3176 rsDesc.Desc_1_1.NumParameters = 3;
3177 rsDesc.Desc_1_1.pParameters = rootParams;
3178 rsDesc.Desc_1_1.NumStaticSamplers = 1;
3179 rsDesc.Desc_1_1.pStaticSamplers = &samplerDesc;
3180
3181 ID3DBlob *signature = nullptr;
3182 HRESULT hr = D3D12SerializeVersionedRootSignature(&rsDesc, &signature, nullptr);
3183 if (FAILED(hr)) {
3184 qWarning("Failed to serialize root signature: %s", qPrintable(QSystemError::windowsComString(hr)));
3185 return false;
3186 }
3187 ID3D12RootSignature *rootSig = nullptr;
3188 hr = rhiD->dev->CreateRootSignature(0,
3189 signature->GetBufferPointer(),
3190 signature->GetBufferSize(),
3191 __uuidof(ID3D12RootSignature),
3192 reinterpret_cast<void **>(&rootSig));
3193 signature->Release();
3194 if (FAILED(hr)) {
3195 qWarning("Failed to create root signature: %s",
3196 qPrintable(QSystemError::windowsComString(hr)));
3197 return false;
3198 }
3199
3200 rootSigHandle = QD3D12RootSignature::addToPool(&rhiD->rootSignaturePool, rootSig);
3201
3202 D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
3203 psoDesc.pRootSignature = rootSig;
3204 psoDesc.CS.pShaderBytecode = g_csMipmap;
3205 psoDesc.CS.BytecodeLength = sizeof(g_csMipmap);
3206 ID3D12PipelineState *pso = nullptr;
3207 hr = rhiD->dev->CreateComputePipelineState(&psoDesc,
3208 __uuidof(ID3D12PipelineState),
3209 reinterpret_cast<void **>(&pso));
3210 if (FAILED(hr)) {
3211 qWarning("Failed to create compute pipeline state: %s",
3212 qPrintable(QSystemError::windowsComString(hr)));
3213 rhiD->rootSignaturePool.remove(rootSigHandle);
3214 rootSigHandle = {};
3215 return false;
3216 }
3217
3218 pipelineHandle = QD3D12Pipeline::addToPool(&rhiD->pipelinePool, QD3D12Pipeline::Compute, pso);
3219
3220 return true;
3221}
3222
3223void QD3D12MipmapGenerator::destroy()
3224{
3225 rhiD->pipelinePool.remove(pipelineHandle);
3226 pipelineHandle = {};
3227 rhiD->rootSignaturePool.remove(rootSigHandle);
3228 rootSigHandle = {};
3229}
3230
3231void QD3D12MipmapGenerator::generate(QD3D12CommandBuffer *cbD, const QD3D12ObjectHandle &textureHandle)
3232{
3233 QD3D12Pipeline *pipeline = rhiD->pipelinePool.lookupRef(pipelineHandle);
3234 if (!pipeline)
3235 return;
3236 QD3D12RootSignature *rootSig = rhiD->rootSignaturePool.lookupRef(rootSigHandle);
3237 if (!rootSig)
3238 return;
3239 QD3D12Resource *res = rhiD->resourcePool.lookupRef(textureHandle);
3240 if (!res)
3241 return;
3242
3243 const quint32 mipLevelCount = res->desc.MipLevels;
3244 if (mipLevelCount < 2)
3245 return;
3246
3247 if (res->desc.SampleDesc.Count > 1) {
3248 qWarning("Cannot generate mipmaps for MSAA texture");
3249 return;
3250 }
3251
3252 const bool is1D = res->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE1D;
3253 if (is1D) {
3254 qWarning("Cannot generate mipmaps for 1D texture");
3255 return;
3256 }
3257
3258 const bool is3D = res->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D;
3259 const bool isCubeOrArray = res->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D
3260 && res->desc.DepthOrArraySize > 1;
3261 const quint32 layerCount = isCubeOrArray ? res->desc.DepthOrArraySize : 1;
3262
3263 if (is3D) {
3264 qWarning("2D mipmap generator invoked for 3D texture, this should not happen");
3265 return;
3266 }
3267
3268 rhiD->barrierGen.addTransitionBarrier(textureHandle, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
3269 rhiD->barrierGen.enqueueBufferedTransitionBarriers(cbD);
3270
3271 cbD->cmdList->SetPipelineState(pipeline->pso);
3272 cbD->cmdList->SetComputeRootSignature(rootSig->rootSig);
3273
3274 const quint32 descriptorByteSize = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].descriptorByteSize;
3275
3276 struct CBufData {
3277 quint32 srcMipLevel;
3278 quint32 numMipLevels;
3279 float texelWidth;
3280 float texelHeight;
3281 };
3282
3283 const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(sizeof(CBufData), mipLevelCount * layerCount);
3284 std::optional<QD3D12StagingArea> ownStagingArea;
3285 if (rhiD->smallStagingAreas[rhiD->currentFrameSlot].remainingCapacity() < allocSize) {
3286 ownStagingArea = QD3D12StagingArea();
3287 if (!ownStagingArea->create(rhiD, allocSize, D3D12_HEAP_TYPE_UPLOAD)) {
3288 qWarning("Could not create staging area for mipmap generation");
3289 return;
3290 }
3291 }
3292 QD3D12StagingArea *workArea = ownStagingArea.has_value()
3293 ? &ownStagingArea.value()
3294 : &rhiD->smallStagingAreas[rhiD->currentFrameSlot];
3295
3296 bool gotNewHeap = false;
3297 if (!rhiD->ensureShaderVisibleDescriptorHeapCapacity(&rhiD->shaderVisibleCbvSrvUavHeap,
3298 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
3299 rhiD->currentFrameSlot,
3300 (1 + 4) * mipLevelCount * layerCount,
3301 &gotNewHeap))
3302 {
3303 qWarning("Could not ensure enough space in descriptor heap for mipmap generation");
3304 return;
3305 }
3306 if (gotNewHeap)
3307 rhiD->bindShaderVisibleHeaps(cbD);
3308
3309 for (quint32 layer = 0; layer < layerCount; ++layer) {
3310 for (quint32 level = 0; level < mipLevelCount ;) {
3311 UINT subresource = calcSubresource(level, layer, res->desc.MipLevels);
3312 rhiD->barrierGen.enqueueSubresourceTransitionBarrier(cbD, textureHandle, subresource,
3313 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3314 D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
3315
3316 quint32 levelPlusOneMipWidth = res->desc.Width >> (level + 1);
3317 quint32 levelPlusOneMipHeight = res->desc.Height >> (level + 1);
3318 const quint32 dw = levelPlusOneMipWidth == 1 ? levelPlusOneMipHeight : levelPlusOneMipWidth;
3319 const quint32 dh = levelPlusOneMipHeight == 1 ? levelPlusOneMipWidth : levelPlusOneMipHeight;
3320 // number of times the size can be halved while still resulting in an even dimension
3321 const quint32 additionalMips = qCountTrailingZeroBits(dw | dh);
3322 const quint32 numGenMips = qMin(1u + qMin(3u, additionalMips), res->desc.MipLevels - level);
3323 levelPlusOneMipWidth = qMax(1u, levelPlusOneMipWidth);
3324 levelPlusOneMipHeight = qMax(1u, levelPlusOneMipHeight);
3325
3326 CBufData cbufData = {
3327 level,
3328 numGenMips,
3329 1.0f / float(levelPlusOneMipWidth),
3330 1.0f / float(levelPlusOneMipHeight)
3331 };
3332
3333 QD3D12StagingArea::Allocation cbuf = workArea->get(sizeof(cbufData));
3334 memcpy(cbuf.p, &cbufData, sizeof(cbufData));
3335 cbD->cmdList->SetComputeRootConstantBufferView(0, cbuf.gpuAddr);
3336
3337 QD3D12Descriptor srv = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].get(1);
3338 D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
3339 srvDesc.Format = res->desc.Format;
3340 srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
3341 if (isCubeOrArray) {
3342 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
3343 srvDesc.Texture2DArray.MipLevels = res->desc.MipLevels;
3344 srvDesc.Texture2DArray.FirstArraySlice = layer;
3345 srvDesc.Texture2DArray.ArraySize = 1;
3346 } else {
3347 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
3348 srvDesc.Texture2D.MipLevels = res->desc.MipLevels;
3349 }
3350 rhiD->dev->CreateShaderResourceView(res->resource, &srvDesc, srv.cpuHandle);
3351 cbD->cmdList->SetComputeRootDescriptorTable(1, srv.gpuHandle);
3352
3353 QD3D12Descriptor uavStart = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].get(4);
3354 D3D12_CPU_DESCRIPTOR_HANDLE uavCpuHandle = uavStart.cpuHandle;
3355 // if level is N, then need UAVs for levels N+1, ..., N+4
3356 for (quint32 uavIdx = 0; uavIdx < 4; ++uavIdx) {
3357 const quint32 uavMipLevel = qMin(level + 1u + uavIdx, res->desc.MipLevels - 1u);
3358 D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
3359 uavDesc.Format = res->desc.Format;
3360 if (isCubeOrArray) {
3361 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
3362 uavDesc.Texture2DArray.MipSlice = uavMipLevel;
3363 uavDesc.Texture2DArray.FirstArraySlice = layer;
3364 uavDesc.Texture2DArray.ArraySize = 1;
3365 } else {
3366 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
3367 uavDesc.Texture2D.MipSlice = uavMipLevel;
3368 }
3369 rhiD->dev->CreateUnorderedAccessView(res->resource, nullptr, &uavDesc, uavCpuHandle);
3370 uavCpuHandle.ptr += descriptorByteSize;
3371 }
3372 cbD->cmdList->SetComputeRootDescriptorTable(2, uavStart.gpuHandle);
3373
3374 cbD->cmdList->Dispatch(levelPlusOneMipWidth, levelPlusOneMipHeight, 1);
3375
3376 rhiD->barrierGen.enqueueUavBarrier(cbD, textureHandle);
3377 rhiD->barrierGen.enqueueSubresourceTransitionBarrier(cbD, textureHandle, subresource,
3378 D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
3379 D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
3380
3381 level += numGenMips;
3382 }
3383 }
3384
3385 if (ownStagingArea.has_value())
3386 ownStagingArea->destroyWithDeferredRelease(&rhiD->releaseQueue);
3387}
3388
3389bool QD3D12MipmapGenerator3D::create(QRhiD3D12 *rhiD)
3390{
3391 this->rhiD = rhiD;
3392
3393 D3D12_ROOT_PARAMETER1 rootParams[3] = {};
3394 D3D12_DESCRIPTOR_RANGE1 descriptorRanges[2] = {};
3395
3396 // b0
3397 rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
3398 rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3399 rootParams[0].Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC;
3400
3401 // t0
3402 descriptorRanges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
3403 descriptorRanges[0].NumDescriptors = 1;
3404 descriptorRanges[0].Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE;
3405 rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
3406 rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3407 rootParams[1].DescriptorTable.NumDescriptorRanges = 1;
3408 rootParams[1].DescriptorTable.pDescriptorRanges = &descriptorRanges[0];
3409
3410 // u0
3411 descriptorRanges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
3412 descriptorRanges[1].NumDescriptors = 1;
3413 rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
3414 rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3415 rootParams[2].DescriptorTable.NumDescriptorRanges = 1;
3416 rootParams[2].DescriptorTable.pDescriptorRanges = &descriptorRanges[1];
3417
3418 // s0
3419 D3D12_STATIC_SAMPLER_DESC samplerDesc = {};
3420 samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
3421 samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3422 samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3423 samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3424 samplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3425
3426 D3D12_VERSIONED_ROOT_SIGNATURE_DESC rsDesc = {};
3427 rsDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
3428 rsDesc.Desc_1_1.NumParameters = 3;
3429 rsDesc.Desc_1_1.pParameters = rootParams;
3430 rsDesc.Desc_1_1.NumStaticSamplers = 1;
3431 rsDesc.Desc_1_1.pStaticSamplers = &samplerDesc;
3432
3433 ID3DBlob *signature = nullptr;
3434 HRESULT hr = D3D12SerializeVersionedRootSignature(&rsDesc, &signature, nullptr);
3435 if (FAILED(hr)) {
3436 qWarning("Failed to serialize root signature: %s", qPrintable(QSystemError::windowsComString(hr)));
3437 return false;
3438 }
3439 ID3D12RootSignature *rootSig = nullptr;
3440 hr = rhiD->dev->CreateRootSignature(0,
3441 signature->GetBufferPointer(),
3442 signature->GetBufferSize(),
3443 __uuidof(ID3D12RootSignature),
3444 reinterpret_cast<void **>(&rootSig));
3445 signature->Release();
3446 if (FAILED(hr)) {
3447 qWarning("Failed to create root signature: %s",
3448 qPrintable(QSystemError::windowsComString(hr)));
3449 return false;
3450 }
3451
3452 rootSigHandle = QD3D12RootSignature::addToPool(&rhiD->rootSignaturePool, rootSig);
3453
3454 D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
3455 psoDesc.pRootSignature = rootSig;
3456 psoDesc.CS.pShaderBytecode = g_csMipmap3D;
3457 psoDesc.CS.BytecodeLength = sizeof(g_csMipmap3D);
3458 ID3D12PipelineState *pso = nullptr;
3459 hr = rhiD->dev->CreateComputePipelineState(&psoDesc,
3460 __uuidof(ID3D12PipelineState),
3461 reinterpret_cast<void **>(&pso));
3462 if (FAILED(hr)) {
3463 qWarning("Failed to create compute pipeline state: %s",
3464 qPrintable(QSystemError::windowsComString(hr)));
3465 rhiD->rootSignaturePool.remove(rootSigHandle);
3466 rootSigHandle = {};
3467 return false;
3468 }
3469
3470 pipelineHandle = QD3D12Pipeline::addToPool(&rhiD->pipelinePool, QD3D12Pipeline::Compute, pso);
3471
3472 return true;
3473}
3474
3475void QD3D12MipmapGenerator3D::destroy()
3476{
3477 rhiD->pipelinePool.remove(pipelineHandle);
3478 pipelineHandle = {};
3479 rhiD->rootSignaturePool.remove(rootSigHandle);
3480 rootSigHandle = {};
3481}
3482
3483void QD3D12MipmapGenerator3D::generate(QD3D12CommandBuffer *cbD, const QD3D12ObjectHandle &textureHandle)
3484{
3485 QD3D12Pipeline *pipeline = rhiD->pipelinePool.lookupRef(pipelineHandle);
3486 if (!pipeline)
3487 return;
3488 QD3D12RootSignature *rootSig = rhiD->rootSignaturePool.lookupRef(rootSigHandle);
3489 if (!rootSig)
3490 return;
3491 QD3D12Resource *res = rhiD->resourcePool.lookupRef(textureHandle);
3492 if (!res)
3493 return;
3494
3495 const quint32 mipLevelCount = res->desc.MipLevels;
3496 if (mipLevelCount < 2)
3497 return;
3498
3499 const bool is3D = res->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D;
3500 if (!is3D) {
3501 qWarning("3D mipmap generator invoked for non-3D texture, this should not happen");
3502 return;
3503 }
3504
3505 rhiD->barrierGen.addTransitionBarrier(textureHandle, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
3506 rhiD->barrierGen.enqueueBufferedTransitionBarriers(cbD);
3507
3508 cbD->cmdList->SetPipelineState(pipeline->pso);
3509 cbD->cmdList->SetComputeRootSignature(rootSig->rootSig);
3510
3511 const quint32 descriptorByteSize = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].descriptorByteSize;
3512
3513 struct CBufData {
3514 float texelWidth;
3515 float texelHeight;
3516 float texelDepth;
3517 quint32 srcMipLevel;
3518 };
3519
3520 const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(sizeof(CBufData), mipLevelCount);
3521 std::optional<QD3D12StagingArea> ownStagingArea;
3522 if (rhiD->smallStagingAreas[rhiD->currentFrameSlot].remainingCapacity() < allocSize) {
3523 ownStagingArea = QD3D12StagingArea();
3524 if (!ownStagingArea->create(rhiD, allocSize, D3D12_HEAP_TYPE_UPLOAD)) {
3525 qWarning("Could not create staging area for mipmap generation");
3526 return;
3527 }
3528 }
3529 QD3D12StagingArea *workArea = ownStagingArea.has_value()
3530 ? &ownStagingArea.value()
3531 : &rhiD->smallStagingAreas[rhiD->currentFrameSlot];
3532
3533 bool gotNewHeap = false;
3534 if (!rhiD->ensureShaderVisibleDescriptorHeapCapacity(&rhiD->shaderVisibleCbvSrvUavHeap,
3535 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
3536 rhiD->currentFrameSlot,
3537 (1 + 1) * mipLevelCount, // 1 SRV + 1 UAV
3538 &gotNewHeap))
3539 {
3540 qWarning("Could not ensure enough space in descriptor heap for mipmap generation");
3541 return;
3542 }
3543 if (gotNewHeap)
3544 rhiD->bindShaderVisibleHeaps(cbD);
3545
3546 for (quint32 level = 0; level < mipLevelCount; ++level) {
3547 UINT subresource = calcSubresource(level, 0u, res->desc.MipLevels);
3548 rhiD->barrierGen.enqueueSubresourceTransitionBarrier(cbD, textureHandle, subresource,
3549 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3550 D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
3551
3552 quint32 levelPlusOneMipWidth = qMax<quint32>(1, res->desc.Width >> (level + 1));
3553 quint32 levelPlusOneMipHeight = qMax<quint32>(1, res->desc.Height >> (level + 1));
3554 quint32 levelPlusOneMipDepth = qMax<quint32>(1, res->desc.DepthOrArraySize >> (level + 1));
3555
3556 CBufData cbufData = {
3557 1.0f / float(levelPlusOneMipWidth),
3558 1.0f / float(levelPlusOneMipHeight),
3559 1.0f / float(levelPlusOneMipDepth),
3560 quint32(level)
3561 };
3562
3563 QD3D12StagingArea::Allocation cbuf = workArea->get(sizeof(cbufData));
3564 memcpy(cbuf.p, &cbufData, sizeof(cbufData));
3565 cbD->cmdList->SetComputeRootConstantBufferView(0, cbuf.gpuAddr);
3566
3567 QD3D12Descriptor srv = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].get(1);
3568 D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
3569 srvDesc.Format = res->desc.Format;
3570 srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
3571 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
3572 srvDesc.Texture3D.MipLevels = res->desc.MipLevels;
3573
3574 rhiD->dev->CreateShaderResourceView(res->resource, &srvDesc, srv.cpuHandle);
3575 cbD->cmdList->SetComputeRootDescriptorTable(1, srv.gpuHandle);
3576
3577 QD3D12Descriptor uavStart = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].get(1);
3578 D3D12_CPU_DESCRIPTOR_HANDLE uavCpuHandle = uavStart.cpuHandle;
3579 const quint32 uavMipLevel = qMin(level + 1u, res->desc.MipLevels - 1u);
3580 D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
3581 uavDesc.Format = res->desc.Format;
3582 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
3583 uavDesc.Texture3D.MipSlice = uavMipLevel;
3584 uavDesc.Texture3D.WSize = UINT(-1);
3585 rhiD->dev->CreateUnorderedAccessView(res->resource, nullptr, &uavDesc, uavCpuHandle);
3586 uavCpuHandle.ptr += descriptorByteSize;
3587 cbD->cmdList->SetComputeRootDescriptorTable(2, uavStart.gpuHandle);
3588
3589 cbD->cmdList->Dispatch(levelPlusOneMipWidth, levelPlusOneMipHeight, levelPlusOneMipDepth);
3590
3591 rhiD->barrierGen.enqueueUavBarrier(cbD, textureHandle);
3592 rhiD->barrierGen.enqueueSubresourceTransitionBarrier(cbD, textureHandle, subresource,
3593 D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
3594 D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
3595 }
3596
3597 if (ownStagingArea.has_value())
3598 ownStagingArea->destroyWithDeferredRelease(&rhiD->releaseQueue);
3599}
3600
3601bool QD3D12MemoryAllocator::create(ID3D12Device *device, IDXGIAdapter1 *adapter)
3602{
3603 this->device = device;
3604
3605 // We can function with and without D3D12MA: CreateCommittedResource is
3606 // just fine for our purposes and not any complicated API-wise; the memory
3607 // allocator is interesting for efficiency mainly since it can suballocate
3608 // instead of making everything a committed resource allocation.
3609
3610 static bool disableMA = qEnvironmentVariableIntValue("QT_D3D_NO_SUBALLOC");
3611 if (disableMA)
3612 return true;
3613
3614 DXGI_ADAPTER_DESC1 desc;
3615 adapter->GetDesc1(&desc);
3616 if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE)
3617 return true;
3618
3619 D3D12MA::ALLOCATOR_DESC allocatorDesc = {};
3620 allocatorDesc.pDevice = device;
3621 allocatorDesc.pAdapter = adapter;
3622 // A QRhi is supposed to be used from one single thread only. Disable
3623 // the allocator's own mutexes. This may give a performance boost.
3624 allocatorDesc.Flags = D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED;
3625 HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &allocator);
3626 if (FAILED(hr)) {
3627 qWarning("Failed to initialize D3D12 Memory Allocator: %s",
3628 qPrintable(QSystemError::windowsComString(hr)));
3629 return false;
3630 }
3631 return true;
3632}
3633
3634void QD3D12MemoryAllocator::destroy()
3635{
3636 if (allocator) {
3637 allocator->Release();
3638 allocator = nullptr;
3639 }
3640}
3641
3642HRESULT QD3D12MemoryAllocator::createResource(D3D12_HEAP_TYPE heapType,
3643 const D3D12_RESOURCE_DESC *resourceDesc,
3644 D3D12_RESOURCE_STATES initialState,
3645 const D3D12_CLEAR_VALUE *optimizedClearValue,
3646 D3D12MA::Allocation **maybeAllocation,
3647 REFIID riidResource,
3648 void **ppvResource)
3649{
3650 if (allocator) {
3651 D3D12MA::ALLOCATION_DESC allocDesc = {};
3652 allocDesc.HeapType = heapType;
3653 return allocator->CreateResource(&allocDesc,
3654 resourceDesc,
3655 initialState,
3656 optimizedClearValue,
3657 maybeAllocation,
3658 riidResource,
3659 ppvResource);
3660 } else {
3661 *maybeAllocation = nullptr;
3662 D3D12_HEAP_PROPERTIES heapProps = {};
3663 heapProps.Type = heapType;
3664 return device->CreateCommittedResource(&heapProps,
3665 D3D12_HEAP_FLAG_NONE,
3666 resourceDesc,
3667 initialState,
3668 optimizedClearValue,
3669 riidResource,
3670 ppvResource);
3671 }
3672}
3673
3674void QD3D12MemoryAllocator::getBudget(D3D12MA::Budget *localBudget, D3D12MA::Budget *nonLocalBudget)
3675{
3676 if (allocator) {
3677 allocator->GetBudget(localBudget, nonLocalBudget);
3678 } else {
3679 *localBudget = {};
3680 *nonLocalBudget = {};
3681 }
3682}
3683
3684void QRhiD3D12::waitGpu()
3685{
3686 fullFenceCounter += 1u;
3687 if (SUCCEEDED(cmdQueue->Signal(fullFence, fullFenceCounter))) {
3688 if (SUCCEEDED(fullFence->SetEventOnCompletion(fullFenceCounter, fullFenceEvent)))
3689 WaitForSingleObject(fullFenceEvent, INFINITE);
3690 }
3691}
3692
3693DXGI_SAMPLE_DESC QRhiD3D12::effectiveSampleDesc(int sampleCount, DXGI_FORMAT format) const
3694{
3695 DXGI_SAMPLE_DESC desc;
3696 desc.Count = 1;
3697 desc.Quality = 0;
3698
3699 const int s = effectiveSampleCount(sampleCount);
3700
3701 if (s > 1) {
3702 D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msaaInfo = {};
3703 msaaInfo.Format = format;
3704 msaaInfo.SampleCount = UINT(s);
3705 if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &msaaInfo, sizeof(msaaInfo)))) {
3706 if (msaaInfo.NumQualityLevels > 0) {
3707 desc.Count = UINT(s);
3708 desc.Quality = msaaInfo.NumQualityLevels - 1;
3709 } else {
3710 qWarning("No quality levels for multisampling with sample count %d", s);
3711 }
3712 }
3713 }
3714
3715 return desc;
3716}
3717
3718bool QRhiD3D12::startCommandListForCurrentFrameSlot(D3D12GraphicsCommandList **cmdList)
3719{
3720 ID3D12CommandAllocator *cmdAlloc = cmdAllocators[currentFrameSlot];
3721 if (!*cmdList) {
3722 HRESULT hr = dev->CreateCommandList(0,
3723 D3D12_COMMAND_LIST_TYPE_DIRECT,
3724 cmdAlloc,
3725 nullptr,
3726 __uuidof(D3D12GraphicsCommandList),
3727 reinterpret_cast<void **>(cmdList));
3728 if (FAILED(hr)) {
3729 qWarning("Failed to create command list: %s", qPrintable(QSystemError::windowsComString(hr)));
3730 return false;
3731 }
3732 } else {
3733 HRESULT hr = (*cmdList)->Reset(cmdAlloc, nullptr);
3734 if (FAILED(hr)) {
3735 qWarning("Failed to reset command list: %s", qPrintable(QSystemError::windowsComString(hr)));
3736 return false;
3737 }
3738 }
3739 return true;
3740}
3741
3742static inline QRhiTexture::Format swapchainReadbackTextureFormat(DXGI_FORMAT format, QRhiTexture::Flags *flags)
3743{
3744 switch (format) {
3745 case DXGI_FORMAT_R8G8B8A8_UNORM:
3746 return QRhiTexture::RGBA8;
3747 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
3748 if (flags)
3749 (*flags) |= QRhiTexture::sRGB;
3750 return QRhiTexture::RGBA8;
3751 case DXGI_FORMAT_B8G8R8A8_UNORM:
3752 return QRhiTexture::BGRA8;
3753 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
3754 if (flags)
3755 (*flags) |= QRhiTexture::sRGB;
3756 return QRhiTexture::BGRA8;
3757 case DXGI_FORMAT_R16G16B16A16_FLOAT:
3758 return QRhiTexture::RGBA16F;
3759 case DXGI_FORMAT_R32G32B32A32_FLOAT:
3760 return QRhiTexture::RGBA32F;
3761 case DXGI_FORMAT_R10G10B10A2_UNORM:
3762 return QRhiTexture::RGB10A2;
3763 default:
3764 qWarning("DXGI_FORMAT %d cannot be read back", format);
3765 break;
3766 }
3767 return QRhiTexture::UnknownFormat;
3768}
3769
3770void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpdateBatch *resourceUpdates)
3771{
3772 QRhiResourceUpdateBatchPrivate *ud = QRhiResourceUpdateBatchPrivate::get(resourceUpdates);
3773
3774 for (int opIdx = 0; opIdx < ud->activeBufferOpCount; ++opIdx) {
3775 const QRhiResourceUpdateBatchPrivate::BufferOp &u(ud->bufferOps[opIdx]);
3776 if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::DynamicUpdate) {
3777 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, u.buf);
3778 Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic);
3779 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
3780 if (u.offset == 0 && u.data.size() == bufD->m_size)
3781 bufD->pendingHostWrites[i].clear();
3782 bufD->pendingHostWrites[i].append({ u.offset, u.data });
3783 }
3784 } else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) {
3785 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, u.buf);
3786 Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
3787 Q_ASSERT(u.offset + u.data.size() <= bufD->m_size);
3788
3789 // The general approach to staging upload data is to first try
3790 // using the per-frame "small" staging area, which is a very simple
3791 // linear allocator; if that's not big enough then create a
3792 // dedicated StagingArea and then deferred-release it to make sure
3793 // if stays alive while the frame is possibly still in flight.
3794
3795 QD3D12StagingArea::Allocation stagingAlloc;
3796 const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(bufD->m_size, 1);
3797 if (smallStagingAreas[currentFrameSlot].remainingCapacity() >= allocSize)
3798 stagingAlloc = smallStagingAreas[currentFrameSlot].get(bufD->m_size);
3799
3800 std::optional<QD3D12StagingArea> ownStagingArea;
3801 if (!stagingAlloc.isValid()) {
3802 ownStagingArea = QD3D12StagingArea();
3803 if (!ownStagingArea->create(this, allocSize, D3D12_HEAP_TYPE_UPLOAD))
3804 continue;
3805 stagingAlloc = ownStagingArea->get(allocSize);
3806 if (!stagingAlloc.isValid()) {
3807 ownStagingArea->destroy();
3808 continue;
3809 }
3810 }
3811
3812 memcpy(stagingAlloc.p + u.offset, u.data.constData(), u.data.size());
3813
3814 barrierGen.addTransitionBarrier(bufD->handles[0], D3D12_RESOURCE_STATE_COPY_DEST);
3815 barrierGen.enqueueBufferedTransitionBarriers(cbD);
3816
3817 if (QD3D12Resource *res = resourcePool.lookupRef(bufD->handles[0])) {
3818 cbD->cmdList->CopyBufferRegion(res->resource,
3819 u.offset,
3820 stagingAlloc.buffer,
3821 stagingAlloc.bufferOffset + u.offset,
3822 u.data.size());
3823 }
3824
3825 if (ownStagingArea.has_value())
3826 ownStagingArea->destroyWithDeferredRelease(&releaseQueue);
3827 } else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::Read) {
3828 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, u.buf);
3829 if (bufD->m_type == QRhiBuffer::Dynamic) {
3830 bufD->executeHostWritesForFrameSlot(currentFrameSlot);
3831 if (QD3D12Resource *res = resourcePool.lookupRef(bufD->handles[currentFrameSlot])) {
3832 Q_ASSERT(res->cpuMapPtr);
3833 u.result->data.resize(u.readSize);
3834 memcpy(u.result->data.data(), reinterpret_cast<char *>(res->cpuMapPtr) + u.offset, u.readSize);
3835 }
3836 if (u.result->completed)
3837 u.result->completed();
3838 } else {
3839 QD3D12Readback readback;
3840 readback.frameSlot = currentFrameSlot;
3841 readback.result = u.result;
3842 readback.byteSize = u.readSize;
3843 const quint32 allocSize = aligned(u.readSize, QD3D12StagingArea::ALIGNMENT);
3844 if (!readback.staging.create(this, allocSize, D3D12_HEAP_TYPE_READBACK)) {
3845 if (u.result->completed)
3846 u.result->completed();
3847 continue;
3848 }
3849 QD3D12StagingArea::Allocation stagingAlloc = readback.staging.get(u.readSize);
3850 if (!stagingAlloc.isValid()) {
3851 readback.staging.destroy();
3852 if (u.result->completed)
3853 u.result->completed();
3854 continue;
3855 }
3856 Q_ASSERT(stagingAlloc.bufferOffset == 0);
3857 barrierGen.addTransitionBarrier(bufD->handles[0], D3D12_RESOURCE_STATE_COPY_SOURCE);
3858 barrierGen.enqueueBufferedTransitionBarriers(cbD);
3859 if (QD3D12Resource *res = resourcePool.lookupRef(bufD->handles[0])) {
3860 cbD->cmdList->CopyBufferRegion(stagingAlloc.buffer, 0, res->resource, u.offset, u.readSize);
3861 activeReadbacks.append(readback);
3862 } else {
3863 readback.staging.destroy();
3864 if (u.result->completed)
3865 u.result->completed();
3866 }
3867 }
3868 }
3869 }
3870
3871 for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) {
3872 const QRhiResourceUpdateBatchPrivate::TextureOp &u(ud->textureOps[opIdx]);
3873 if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Upload) {
3874 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, u.dst);
3875 const bool is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
3876 QD3D12Resource *res = resourcePool.lookupRef(texD->handle);
3877 if (!res)
3878 continue;
3879 barrierGen.addTransitionBarrier(texD->handle, D3D12_RESOURCE_STATE_COPY_DEST);
3880 barrierGen.enqueueBufferedTransitionBarriers(cbD);
3881 for (int layer = 0, maxLayer = u.subresDesc.size(); layer < maxLayer; ++layer) {
3882 for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) {
3883 for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level])) {
3884 D3D12_SUBRESOURCE_FOOTPRINT footprint = {};
3885 footprint.Format = res->desc.Format;
3886 footprint.Depth = 1;
3887 quint32 totalBytes = 0;
3888
3889 const QSize subresSize = subresDesc.sourceSize().isEmpty() ? q->sizeForMipLevel(level, texD->m_pixelSize)
3890 : subresDesc.sourceSize();
3891 const QPoint srcPos = subresDesc.sourceTopLeft();
3892 QPoint dstPos = subresDesc.destinationTopLeft();
3893
3894 if (!subresDesc.image().isNull()) {
3895 const QImage img = subresDesc.image();
3896 const int bpl = img.bytesPerLine();
3897 footprint.RowPitch = aligned<UINT>(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
3898 totalBytes = footprint.RowPitch * img.height();
3899 } else if (!subresDesc.data().isEmpty() && isCompressedFormat(texD->m_format)) {
3900 QSize blockDim;
3901 quint32 bpl = 0;
3902 compressedFormatInfo(texD->m_format, subresSize, &bpl, nullptr, &blockDim);
3903 footprint.RowPitch = aligned<UINT>(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
3904 const int rowCount = aligned(subresSize.height(), blockDim.height()) / blockDim.height();
3905 totalBytes = footprint.RowPitch * rowCount;
3906 } else if (!subresDesc.data().isEmpty()) {
3907 quint32 bpl = 0;
3908 if (subresDesc.dataStride())
3909 bpl = subresDesc.dataStride();
3910 else
3911 textureFormatInfo(texD->m_format, subresSize, &bpl, nullptr, nullptr);
3912 footprint.RowPitch = aligned<UINT>(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
3913 totalBytes = footprint.RowPitch * subresSize.height();
3914 } else {
3915 qWarning("Invalid texture upload for %p layer=%d mip=%d", texD, layer, level);
3916 continue;
3917 }
3918
3919 const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(totalBytes, 1);
3920 QD3D12StagingArea::Allocation stagingAlloc;
3921 if (smallStagingAreas[currentFrameSlot].remainingCapacity() >= allocSize)
3922 stagingAlloc = smallStagingAreas[currentFrameSlot].get(allocSize);
3923
3924 std::optional<QD3D12StagingArea> ownStagingArea;
3925 if (!stagingAlloc.isValid()) {
3926 ownStagingArea = QD3D12StagingArea();
3927 if (!ownStagingArea->create(this, allocSize, D3D12_HEAP_TYPE_UPLOAD))
3928 continue;
3929 stagingAlloc = ownStagingArea->get(allocSize);
3930 if (!stagingAlloc.isValid()) {
3931 ownStagingArea->destroy();
3932 continue;
3933 }
3934 }
3935
3936 D3D12_TEXTURE_COPY_LOCATION dst;
3937 dst.pResource = res->resource;
3938 dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
3939 dst.SubresourceIndex = calcSubresource(UINT(level), is3D ? 0u : UINT(layer), texD->mipLevelCount);
3940 D3D12_TEXTURE_COPY_LOCATION src;
3941 src.pResource = stagingAlloc.buffer;
3942 src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
3943 src.PlacedFootprint.Offset = stagingAlloc.bufferOffset;
3944
3945 D3D12_BOX srcBox; // back, right, bottom are exclusive
3946
3947 if (!subresDesc.image().isNull()) {
3948 const QImage img = subresDesc.image();
3949 const int bpc = qMax(1, img.depth() / 8);
3950 const int bpl = img.bytesPerLine();
3951
3952 QSize size = subresDesc.sourceSize().isEmpty() ? img.size() : subresDesc.sourceSize();
3953 size.setWidth(qMin(size.width(), img.width() - srcPos.x()));
3954 size.setHeight(qMin(size.height(), img.height() - srcPos.y()));
3955 size = clampedSubResourceUploadSize(size, dstPos, level, texD->m_pixelSize);
3956
3957 footprint.Width = size.width();
3958 footprint.Height = size.height();
3959
3960 srcBox.left = 0;
3961 srcBox.top = 0;
3962 srcBox.right = UINT(size.width());
3963 srcBox.bottom = UINT(size.height());
3964 srcBox.front = 0;
3965 srcBox.back = 1;
3966
3967 const uchar *imgPtr = img.constBits();
3968 const quint32 lineBytes = size.width() * bpc;
3969 for (int y = 0, h = size.height(); y < h; ++y) {
3970 memcpy(stagingAlloc.p + y * footprint.RowPitch,
3971 imgPtr + srcPos.x() * bpc + (y + srcPos.y()) * bpl,
3972 lineBytes);
3973 }
3974 } else if (!subresDesc.data().isEmpty() && isCompressedFormat(texD->m_format)) {
3975 QSize blockDim;
3976 quint32 bpl = 0;
3977 compressedFormatInfo(texD->m_format, subresSize, &bpl, nullptr, &blockDim);
3978 // x and y must be multiples of the block width and height
3979 dstPos.setX(aligned(dstPos.x(), blockDim.width()));
3980 dstPos.setY(aligned(dstPos.y(), blockDim.height()));
3981
3982 srcBox.left = 0;
3983 srcBox.top = 0;
3984 // width and height must be multiples of the block width and height
3985 srcBox.right = aligned(subresSize.width(), blockDim.width());
3986 srcBox.bottom = aligned(subresSize.height(), blockDim.height());
3987
3988 srcBox.front = 0;
3989 srcBox.back = 1;
3990
3991 footprint.Width = aligned(subresSize.width(), blockDim.width());
3992 footprint.Height = aligned(subresSize.height(), blockDim.height());
3993
3994 const quint32 copyBytes = qMin(bpl, footprint.RowPitch);
3995 const QByteArray imgData = subresDesc.data();
3996 const char *imgPtr = imgData.constData();
3997 const int rowCount = aligned(subresSize.height(), blockDim.height()) / blockDim.height();
3998 for (int y = 0; y < rowCount; ++y)
3999 memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + y * bpl, copyBytes);
4000 } else if (!subresDesc.data().isEmpty()) {
4001 srcBox.left = 0;
4002 srcBox.top = 0;
4003 srcBox.right = subresSize.width();
4004 srcBox.bottom = subresSize.height();
4005 srcBox.front = 0;
4006 srcBox.back = 1;
4007
4008 footprint.Width = subresSize.width();
4009 footprint.Height = subresSize.height();
4010
4011 quint32 bpl = 0;
4012 if (subresDesc.dataStride())
4013 bpl = subresDesc.dataStride();
4014 else
4015 textureFormatInfo(texD->m_format, subresSize, &bpl, nullptr, nullptr);
4016
4017 const quint32 copyBytes = qMin(bpl, footprint.RowPitch);
4018 const QByteArray data = subresDesc.data();
4019 const char *imgPtr = data.constData();
4020 for (int y = 0, h = subresSize.height(); y < h; ++y)
4021 memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + y * bpl, copyBytes);
4022 }
4023
4024 src.PlacedFootprint.Footprint = footprint;
4025
4026 cbD->cmdList->CopyTextureRegion(&dst,
4027 UINT(dstPos.x()),
4028 UINT(dstPos.y()),
4029 is3D ? UINT(layer) : 0u,
4030 &src,
4031 &srcBox);
4032
4033 if (ownStagingArea.has_value())
4034 ownStagingArea->destroyWithDeferredRelease(&releaseQueue);
4035 }
4036 }
4037 }
4038 } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Copy) {
4039 Q_ASSERT(u.src && u.dst);
4040 QD3D12Texture *srcD = QRHI_RES(QD3D12Texture, u.src);
4041 QD3D12Texture *dstD = QRHI_RES(QD3D12Texture, u.dst);
4042 const bool srcIs3D = srcD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
4043 const bool dstIs3D = dstD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
4044 QD3D12Resource *srcRes = resourcePool.lookupRef(srcD->handle);
4045 QD3D12Resource *dstRes = resourcePool.lookupRef(dstD->handle);
4046 if (!srcRes || !dstRes)
4047 continue;
4048
4049 barrierGen.addTransitionBarrier(srcD->handle, D3D12_RESOURCE_STATE_COPY_SOURCE);
4050 barrierGen.addTransitionBarrier(dstD->handle, D3D12_RESOURCE_STATE_COPY_DEST);
4051 barrierGen.enqueueBufferedTransitionBarriers(cbD);
4052
4053 const UINT srcSubresource = calcSubresource(UINT(u.desc.sourceLevel()),
4054 srcIs3D ? 0u : UINT(u.desc.sourceLayer()),
4055 srcD->mipLevelCount);
4056 const UINT dstSubresource = calcSubresource(UINT(u.desc.destinationLevel()),
4057 dstIs3D ? 0u : UINT(u.desc.destinationLayer()),
4058 dstD->mipLevelCount);
4059 const QPoint dp = u.desc.destinationTopLeft();
4060 const QSize mipSize = q->sizeForMipLevel(u.desc.sourceLevel(), srcD->m_pixelSize);
4061 const QSize copySize = u.desc.pixelSize().isEmpty() ? mipSize : u.desc.pixelSize();
4062 const QPoint sp = u.desc.sourceTopLeft();
4063
4064 D3D12_BOX srcBox;
4065 srcBox.left = UINT(sp.x());
4066 srcBox.top = UINT(sp.y());
4067 srcBox.front = srcIs3D ? UINT(u.desc.sourceLayer()) : 0u;
4068 // back, right, bottom are exclusive
4069 srcBox.right = srcBox.left + UINT(copySize.width());
4070 srcBox.bottom = srcBox.top + UINT(copySize.height());
4071 srcBox.back = srcBox.front + 1;
4072
4073 D3D12_TEXTURE_COPY_LOCATION src;
4074 src.pResource = srcRes->resource;
4075 src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
4076 src.SubresourceIndex = srcSubresource;
4077 D3D12_TEXTURE_COPY_LOCATION dst;
4078 dst.pResource = dstRes->resource;
4079 dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
4080 dst.SubresourceIndex = dstSubresource;
4081
4082 cbD->cmdList->CopyTextureRegion(&dst,
4083 UINT(dp.x()),
4084 UINT(dp.y()),
4085 dstIs3D ? UINT(u.desc.destinationLayer()) : 0u,
4086 &src,
4087 &srcBox);
4088 } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Read) {
4089 QD3D12Readback readback;
4090 readback.frameSlot = currentFrameSlot;
4091 readback.result = u.result;
4092
4093 QD3D12ObjectHandle srcHandle;
4094 QRect rect;
4095 bool is3D = false;
4096 if (u.rb.texture()) {
4097 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, u.rb.texture());
4098 if (texD->sampleDesc.Count > 1) {
4099 qWarning("Multisample texture cannot be read back");
4100 continue;
4101 }
4102 is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
4103 if (u.rb.rect().isValid())
4104 rect = u.rb.rect();
4105 else
4106 rect = QRect({0, 0}, q->sizeForMipLevel(u.rb.level(), texD->m_pixelSize));
4107 readback.format = texD->m_format;
4108 srcHandle = texD->handle;
4109 } else {
4110 Q_ASSERT(currentSwapChain);
4111 if (u.rb.rect().isValid())
4112 rect = u.rb.rect();
4113 else
4114 rect = QRect({0, 0}, currentSwapChain->pixelSize);
4115 readback.format = swapchainReadbackTextureFormat(currentSwapChain->colorFormat, nullptr);
4116 if (readback.format == QRhiTexture::UnknownFormat)
4117 continue;
4118 srcHandle = currentSwapChain->colorBuffers[currentSwapChain->currentBackBufferIndex];
4119 }
4120 readback.pixelSize = rect.size();
4121
4122 textureFormatInfo(readback.format,
4123 readback.pixelSize,
4124 &readback.bytesPerLine,
4125 &readback.byteSize,
4126 nullptr);
4127
4128 QD3D12Resource *srcRes = resourcePool.lookupRef(srcHandle);
4129 if (!srcRes)
4130 continue;
4131
4132 const UINT subresource = calcSubresource(UINT(u.rb.level()),
4133 is3D ? 0u : UINT(u.rb.layer()),
4134 srcRes->desc.MipLevels);
4135 D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
4136 // totalBytes is what we get from D3D, with the 256 aligned stride,
4137 // readback.byteSize is the final result that's not relevant here yet
4138 UINT64 totalBytes = 0;
4139 dev->GetCopyableFootprints(&srcRes->desc, subresource, 1, 0,
4140 &layout, nullptr, nullptr, &totalBytes);
4141 readback.stagingRowPitch = layout.Footprint.RowPitch;
4142
4143 const quint32 allocSize = aligned<quint32>(totalBytes, QD3D12StagingArea::ALIGNMENT);
4144 if (!readback.staging.create(this, allocSize, D3D12_HEAP_TYPE_READBACK)) {
4145 if (u.result->completed)
4146 u.result->completed();
4147 continue;
4148 }
4149 QD3D12StagingArea::Allocation stagingAlloc = readback.staging.get(totalBytes);
4150 if (!stagingAlloc.isValid()) {
4151 readback.staging.destroy();
4152 if (u.result->completed)
4153 u.result->completed();
4154 continue;
4155 }
4156 Q_ASSERT(stagingAlloc.bufferOffset == 0);
4157
4158 barrierGen.addTransitionBarrier(srcHandle, D3D12_RESOURCE_STATE_COPY_SOURCE);
4159 barrierGen.enqueueBufferedTransitionBarriers(cbD);
4160
4161 D3D12_TEXTURE_COPY_LOCATION dst;
4162 dst.pResource = stagingAlloc.buffer;
4163 dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
4164 dst.PlacedFootprint.Offset = 0;
4165 dst.PlacedFootprint.Footprint = layout.Footprint;
4166
4167 D3D12_TEXTURE_COPY_LOCATION src;
4168 src.pResource = srcRes->resource;
4169 src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
4170 src.SubresourceIndex = subresource;
4171
4172 D3D12_BOX srcBox = {};
4173 srcBox.left = UINT(rect.left());
4174 srcBox.top = UINT(rect.top());
4175 srcBox.front = is3D ? UINT(u.rb.layer()) : 0u;
4176 // back, right, bottom are exclusive
4177 srcBox.right = srcBox.left + UINT(rect.width());
4178 srcBox.bottom = srcBox.top + UINT(rect.height());
4179 srcBox.back = srcBox.front + 1;
4180
4181 cbD->cmdList->CopyTextureRegion(&dst, 0, 0, 0, &src, &srcBox);
4182 activeReadbacks.append(readback);
4183 } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::GenMips) {
4184 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, u.dst);
4185 Q_ASSERT(texD->flags().testFlag(QRhiTexture::UsedWithGenerateMips));
4186 if (texD->flags().testFlag(QRhiTexture::ThreeDimensional))
4187 mipmapGen3D.generate(cbD, texD->handle);
4188 else
4189 mipmapGen.generate(cbD, texD->handle);
4190 }
4191 }
4192
4193 ud->free();
4194}
4195
4196void QRhiD3D12::finishActiveReadbacks(bool forced)
4197{
4198 QVarLengthArray<std::function<void()>, 4> completedCallbacks;
4199
4200 for (int i = activeReadbacks.size() - 1; i >= 0; --i) {
4201 QD3D12Readback &readback(activeReadbacks[i]);
4202 if (forced || currentFrameSlot == readback.frameSlot || readback.frameSlot < 0) {
4203 readback.result->format = readback.format;
4204 readback.result->pixelSize = readback.pixelSize;
4205 readback.result->data.resize(int(readback.byteSize));
4206
4207 if (readback.format != QRhiTexture::UnknownFormat) {
4208 quint8 *dstPtr = reinterpret_cast<quint8 *>(readback.result->data.data());
4209 const quint8 *srcPtr = readback.staging.mem.p;
4210 const quint32 lineSize = qMin(readback.bytesPerLine, readback.stagingRowPitch);
4211 for (int y = 0, h = readback.pixelSize.height(); y < h; ++y)
4212 memcpy(dstPtr + y * readback.bytesPerLine, srcPtr + y * readback.stagingRowPitch, lineSize);
4213 } else {
4214 memcpy(readback.result->data.data(), readback.staging.mem.p, readback.byteSize);
4215 }
4216
4217 readback.staging.destroy();
4218
4219 if (readback.result->completed)
4220 completedCallbacks.append(readback.result->completed);
4221
4222 activeReadbacks.remove(i);
4223 }
4224 }
4225
4226 for (auto f : completedCallbacks)
4227 f();
4228}
4229
4230bool QRhiD3D12::ensureShaderVisibleDescriptorHeapCapacity(QD3D12ShaderVisibleDescriptorHeap *h,
4231 D3D12_DESCRIPTOR_HEAP_TYPE type,
4232 int frameSlot,
4233 quint32 neededDescriptorCount,
4234 bool *gotNew)
4235{
4236 // Gets a new heap if needed. Note that the capacity we get is clamped
4237 // automatically (e.g. to 1 million, or 2048 for samplers), so * 2 does not
4238 // mean we can grow indefinitely, then again even using the same size would
4239 // work (because we what we are after here is a new heap for the rest of
4240 // the commands, not affecting what's already recorded).
4241 if (h->perFrameHeapSlice[frameSlot].remainingCapacity() < neededDescriptorCount) {
4242 const quint32 newPerFrameSize = qMax(h->perFrameHeapSlice[frameSlot].capacity * 2,
4243 neededDescriptorCount);
4244 QD3D12ShaderVisibleDescriptorHeap newHeap;
4245 if (!newHeap.create(dev, type, newPerFrameSize)) {
4246 qWarning("Could not create new shader-visible descriptor heap");
4247 return false;
4248 }
4249 h->destroyWithDeferredRelease(&releaseQueue);
4250 *h = newHeap;
4251 *gotNew = true;
4252 }
4253 return true;
4254}
4255
4256void QRhiD3D12::bindShaderVisibleHeaps(QD3D12CommandBuffer *cbD)
4257{
4258 ID3D12DescriptorHeap *heaps[] = {
4259 shaderVisibleCbvSrvUavHeap.heap.heap,
4260 samplerMgr.shaderVisibleSamplerHeap.heap.heap
4261 };
4262 cbD->cmdList->SetDescriptorHeaps(2, heaps);
4263}
4264
4265QD3D12Buffer::QD3D12Buffer(QRhiImplementation *rhi, Type type, UsageFlags usage, quint32 size)
4266 : QRhiBuffer(rhi, type, usage, size)
4267{
4268}
4269
4270QD3D12Buffer::~QD3D12Buffer()
4271{
4272 destroy();
4273}
4274
4275void QD3D12Buffer::destroy()
4276{
4277 if (handles[0].isNull())
4278 return;
4279
4280 QRHI_RES_RHI(QRhiD3D12);
4281
4282 // destroy() implementations, unlike other functions, are expected to test
4283 // for m_rhi (rhiD) being null, to allow surviving in case one attempts to
4284 // destroy a (leaked) resource after the QRhi.
4285 //
4286 // If there is no QRhi anymore, we do not deferred-release but that's fine
4287 // since the QRhi already released everything that was in the resourcePool.
4288
4289 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
4290 if (rhiD)
4291 rhiD->releaseQueue.deferredReleaseResource(handles[i]);
4292 handles[i] = {};
4293 pendingHostWrites[i].clear();
4294 }
4295
4296 if (rhiD)
4297 rhiD->unregisterResource(this);
4298}
4299
4300bool QD3D12Buffer::create()
4301{
4302 if (!handles[0].isNull())
4303 destroy();
4304
4305 if (m_usage.testFlag(QRhiBuffer::UniformBuffer) && m_type != Dynamic) {
4306 qWarning("UniformBuffer must always be Dynamic");
4307 return false;
4308 }
4309
4310 if (m_usage.testFlag(QRhiBuffer::StorageBuffer) && m_type == Dynamic) {
4311 qWarning("StorageBuffer cannot be combined with Dynamic");
4312 return false;
4313 }
4314
4315 const quint32 nonZeroSize = m_size <= 0 ? 256 : m_size;
4316 const quint32 roundedSize = aligned(nonZeroSize, m_usage.testFlag(QRhiBuffer::UniformBuffer) ? 256u : 4u);
4317
4318 UINT resourceFlags = D3D12_RESOURCE_FLAG_NONE;
4319 if (m_usage.testFlag(QRhiBuffer::StorageBuffer))
4320 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
4321
4322 QRHI_RES_RHI(QRhiD3D12);
4323 HRESULT hr = 0;
4324 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
4325 if (i == 0 || m_type == Dynamic) {
4326 D3D12_RESOURCE_DESC resourceDesc = {};
4327 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
4328 resourceDesc.Width = roundedSize;
4329 resourceDesc.Height = 1;
4330 resourceDesc.DepthOrArraySize = 1;
4331 resourceDesc.MipLevels = 1;
4332 resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
4333 resourceDesc.SampleDesc = { 1, 0 };
4334 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
4335 resourceDesc.Flags = D3D12_RESOURCE_FLAGS(resourceFlags);
4336 ID3D12Resource *resource = nullptr;
4337 D3D12MA::Allocation *allocation = nullptr;
4338 // Dynamic == host (CPU) visible
4339 D3D12_HEAP_TYPE heapType = m_type == Dynamic
4340 ? D3D12_HEAP_TYPE_UPLOAD
4341 : D3D12_HEAP_TYPE_DEFAULT;
4342 D3D12_RESOURCE_STATES resourceState = m_type == Dynamic
4343 ? D3D12_RESOURCE_STATE_GENERIC_READ
4344 : D3D12_RESOURCE_STATE_COMMON;
4345 hr = rhiD->vma.createResource(heapType,
4346 &resourceDesc,
4347 resourceState,
4348 nullptr,
4349 &allocation,
4350 __uuidof(resource),
4351 reinterpret_cast<void **>(&resource));
4352 if (FAILED(hr))
4353 break;
4354 if (!m_objectName.isEmpty()) {
4355 QString decoratedName = QString::fromUtf8(m_objectName);
4356 if (m_type == Dynamic) {
4357 decoratedName += QLatin1Char('/');
4358 decoratedName += QString::number(i);
4359 }
4360 resource->SetName(reinterpret_cast<LPCWSTR>(decoratedName.utf16()));
4361 }
4362 void *cpuMemPtr = nullptr;
4363 if (m_type == Dynamic) {
4364 // will be mapped for ever on the CPU, this makes future host write operations very simple
4365 hr = resource->Map(0, nullptr, &cpuMemPtr);
4366 if (FAILED(hr)) {
4367 qWarning("Map() failed to dynamic buffer");
4368 resource->Release();
4369 if (allocation)
4370 allocation->Release();
4371 break;
4372 }
4373 }
4374 handles[i] = QD3D12Resource::addToPool(&rhiD->resourcePool,
4375 resource,
4376 resourceState,
4377 allocation,
4378 cpuMemPtr);
4379 }
4380 }
4381 if (FAILED(hr)) {
4382 qWarning("Failed to create buffer: '%s' Type was %d, size was %u, using D3D12MA was %d.",
4383 qPrintable(QSystemError::windowsComString(hr)),
4384 int(m_type),
4385 roundedSize,
4386 int(rhiD->vma.isUsingD3D12MA()));
4387 return false;
4388 }
4389
4390 rhiD->registerResource(this);
4391 return true;
4392}
4393
4394QRhiBuffer::NativeBuffer QD3D12Buffer::nativeBuffer()
4395{
4396 NativeBuffer b;
4397 Q_ASSERT(sizeof(b.objects) / sizeof(b.objects[0]) >= size_t(QD3D12_FRAMES_IN_FLIGHT));
4398 QRHI_RES_RHI(QRhiD3D12);
4399 if (m_type == Dynamic) {
4400 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
4401 executeHostWritesForFrameSlot(i);
4402 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handles[i]))
4403 b.objects[i] = res->resource;
4404 else
4405 b.objects[i] = nullptr;
4406 }
4407 b.slotCount = QD3D12_FRAMES_IN_FLIGHT;
4408 return b;
4409 }
4410 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handles[0]))
4411 b.objects[0] = res->resource;
4412 else
4413 b.objects[0] = nullptr;
4414 b.slotCount = 1;
4415 return b;
4416}
4417
4418char *QD3D12Buffer::beginFullDynamicBufferUpdateForCurrentFrame()
4419{
4420 // Shortcut the entire buffer update mechanism and allow the client to do
4421 // the host writes directly to the buffer. This will lead to unexpected
4422 // results when combined with QRhiResourceUpdateBatch-based updates for the
4423 // buffer, but provides a fast path for dynamic buffers that have all their
4424 // content changed in every frame.
4425
4426 Q_ASSERT(m_type == Dynamic);
4427 QRHI_RES_RHI(QRhiD3D12);
4428 Q_ASSERT(rhiD->inFrame);
4429 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handles[rhiD->currentFrameSlot]))
4430 return static_cast<char *>(res->cpuMapPtr);
4431
4432 return nullptr;
4433}
4434
4435void QD3D12Buffer::endFullDynamicBufferUpdateForCurrentFrame()
4436{
4437 // nothing to do here
4438}
4439
4440void QD3D12Buffer::executeHostWritesForFrameSlot(int frameSlot)
4441{
4442 if (pendingHostWrites[frameSlot].isEmpty())
4443 return;
4444
4445 Q_ASSERT(m_type == QRhiBuffer::Dynamic);
4446 QRHI_RES_RHI(QRhiD3D12);
4447 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handles[frameSlot])) {
4448 Q_ASSERT(res->cpuMapPtr);
4449 for (const QD3D12Buffer::HostWrite &u : std::as_const(pendingHostWrites[frameSlot]))
4450 memcpy(static_cast<char *>(res->cpuMapPtr) + u.offset, u.data.constData(), u.data.size());
4451 }
4452 pendingHostWrites[frameSlot].clear();
4453}
4454
4455static inline DXGI_FORMAT toD3DTextureFormat(QRhiTexture::Format format, QRhiTexture::Flags flags)
4456{
4457 const bool srgb = flags.testFlag(QRhiTexture::sRGB);
4458 switch (format) {
4459 case QRhiTexture::RGBA8:
4460 return srgb ? DXGI_FORMAT_R8G8B8A8_UNORM_SRGB : DXGI_FORMAT_R8G8B8A8_UNORM;
4461 case QRhiTexture::BGRA8:
4462 return srgb ? DXGI_FORMAT_B8G8R8A8_UNORM_SRGB : DXGI_FORMAT_B8G8R8A8_UNORM;
4463 case QRhiTexture::R8:
4464 return DXGI_FORMAT_R8_UNORM;
4465 case QRhiTexture::R8SI:
4466 return DXGI_FORMAT_R8_SINT;
4467 case QRhiTexture::R8UI:
4468 return DXGI_FORMAT_R8_UINT;
4469 case QRhiTexture::RG8:
4470 return DXGI_FORMAT_R8G8_UNORM;
4471 case QRhiTexture::R16:
4472 return DXGI_FORMAT_R16_UNORM;
4473 case QRhiTexture::RG16:
4474 return DXGI_FORMAT_R16G16_UNORM;
4475 case QRhiTexture::RED_OR_ALPHA8:
4476 return DXGI_FORMAT_R8_UNORM;
4477
4478 case QRhiTexture::RGBA16F:
4479 return DXGI_FORMAT_R16G16B16A16_FLOAT;
4480 case QRhiTexture::RGBA32F:
4481 return DXGI_FORMAT_R32G32B32A32_FLOAT;
4482 case QRhiTexture::R16F:
4483 return DXGI_FORMAT_R16_FLOAT;
4484 case QRhiTexture::R32F:
4485 return DXGI_FORMAT_R32_FLOAT;
4486
4487 case QRhiTexture::RGB10A2:
4488 return DXGI_FORMAT_R10G10B10A2_UNORM;
4489
4490 case QRhiTexture::R32SI:
4491 return DXGI_FORMAT_R32_SINT;
4492 case QRhiTexture::R32UI:
4493 return DXGI_FORMAT_R32_UINT;
4494 case QRhiTexture::RG32SI:
4495 return DXGI_FORMAT_R32G32_SINT;
4496 case QRhiTexture::RG32UI:
4497 return DXGI_FORMAT_R32G32_UINT;
4498 case QRhiTexture::RGBA32SI:
4499 return DXGI_FORMAT_R32G32B32A32_SINT;
4500 case QRhiTexture::RGBA32UI:
4501 return DXGI_FORMAT_R32G32B32A32_UINT;
4502
4503 case QRhiTexture::D16:
4504 return DXGI_FORMAT_R16_TYPELESS;
4505 case QRhiTexture::D24:
4506 return DXGI_FORMAT_R24G8_TYPELESS;
4507 case QRhiTexture::D24S8:
4508 return DXGI_FORMAT_R24G8_TYPELESS;
4509 case QRhiTexture::D32F:
4510 return DXGI_FORMAT_R32_TYPELESS;
4511 case QRhiTexture::Format::D32FS8:
4512 return DXGI_FORMAT_R32G8X24_TYPELESS;
4513
4514 case QRhiTexture::BC1:
4515 return srgb ? DXGI_FORMAT_BC1_UNORM_SRGB : DXGI_FORMAT_BC1_UNORM;
4516 case QRhiTexture::BC2:
4517 return srgb ? DXGI_FORMAT_BC2_UNORM_SRGB : DXGI_FORMAT_BC2_UNORM;
4518 case QRhiTexture::BC3:
4519 return srgb ? DXGI_FORMAT_BC3_UNORM_SRGB : DXGI_FORMAT_BC3_UNORM;
4520 case QRhiTexture::BC4:
4521 return DXGI_FORMAT_BC4_UNORM;
4522 case QRhiTexture::BC5:
4523 return DXGI_FORMAT_BC5_UNORM;
4524 case QRhiTexture::BC6H:
4525 return DXGI_FORMAT_BC6H_UF16;
4526 case QRhiTexture::BC7:
4527 return srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM;
4528
4529 case QRhiTexture::ETC2_RGB8:
4530 case QRhiTexture::ETC2_RGB8A1:
4531 case QRhiTexture::ETC2_RGBA8:
4532 qWarning("QRhiD3D12 does not support ETC2 textures");
4533 return DXGI_FORMAT_R8G8B8A8_UNORM;
4534
4535 case QRhiTexture::ASTC_4x4:
4536 case QRhiTexture::ASTC_5x4:
4537 case QRhiTexture::ASTC_5x5:
4538 case QRhiTexture::ASTC_6x5:
4539 case QRhiTexture::ASTC_6x6:
4540 case QRhiTexture::ASTC_8x5:
4541 case QRhiTexture::ASTC_8x6:
4542 case QRhiTexture::ASTC_8x8:
4543 case QRhiTexture::ASTC_10x5:
4544 case QRhiTexture::ASTC_10x6:
4545 case QRhiTexture::ASTC_10x8:
4546 case QRhiTexture::ASTC_10x10:
4547 case QRhiTexture::ASTC_12x10:
4548 case QRhiTexture::ASTC_12x12:
4549 qWarning("QRhiD3D12 does not support ASTC textures");
4550 return DXGI_FORMAT_R8G8B8A8_UNORM;
4551
4552 default:
4553 break;
4554 }
4555 return DXGI_FORMAT_R8G8B8A8_UNORM;
4556}
4557
4558QD3D12RenderBuffer::QD3D12RenderBuffer(QRhiImplementation *rhi,
4559 Type type,
4560 const QSize &pixelSize,
4561 int sampleCount,
4562 Flags flags,
4563 QRhiTexture::Format backingFormatHint)
4564 : QRhiRenderBuffer(rhi, type, pixelSize, sampleCount, flags, backingFormatHint)
4565{
4566}
4567
4568QD3D12RenderBuffer::~QD3D12RenderBuffer()
4569{
4570 destroy();
4571}
4572
4573void QD3D12RenderBuffer::destroy()
4574{
4575 if (handle.isNull())
4576 return;
4577
4578 QRHI_RES_RHI(QRhiD3D12);
4579 if (rhiD) {
4580 if (rtv.isValid())
4581 rhiD->releaseQueue.deferredReleaseResourceWithViews(handle, &rhiD->rtvPool, rtv, 1);
4582 else if (dsv.isValid())
4583 rhiD->releaseQueue.deferredReleaseResourceWithViews(handle, &rhiD->dsvPool, dsv, 1);
4584 }
4585
4586 handle = {};
4587 rtv = {};
4588 dsv = {};
4589
4590 if (rhiD)
4591 rhiD->unregisterResource(this);
4592}
4593
4594bool QD3D12RenderBuffer::create()
4595{
4596 if (!handle.isNull())
4597 destroy();
4598
4599 if (m_pixelSize.isEmpty())
4600 return false;
4601
4602 QRHI_RES_RHI(QRhiD3D12);
4603
4604 switch (m_type) {
4605 case QRhiRenderBuffer::Color:
4606 {
4607 dxgiFormat = toD3DTextureFormat(backingFormat(), {});
4608 sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, dxgiFormat);
4609 D3D12_RESOURCE_DESC resourceDesc = {};
4610 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
4611 resourceDesc.Width = UINT64(m_pixelSize.width());
4612 resourceDesc.Height = UINT(m_pixelSize.height());
4613 resourceDesc.DepthOrArraySize = 1;
4614 resourceDesc.MipLevels = 1;
4615 resourceDesc.Format = dxgiFormat;
4616 resourceDesc.SampleDesc = sampleDesc;
4617 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
4618 resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
4619 D3D12_CLEAR_VALUE clearValue = {};
4620 clearValue.Format = dxgiFormat;
4621 // have a separate allocation and resource object (meaning both will need its own Release())
4622 ID3D12Resource *resource = nullptr;
4623 D3D12MA::Allocation *allocation = nullptr;
4624 HRESULT hr = rhiD->vma.createResource(D3D12_HEAP_TYPE_DEFAULT,
4625 &resourceDesc,
4626 D3D12_RESOURCE_STATE_RENDER_TARGET,
4627 &clearValue,
4628 &allocation,
4629 __uuidof(ID3D12Resource),
4630 reinterpret_cast<void **>(&resource));
4631 if (FAILED(hr)) {
4632 qWarning("Failed to create color buffer: %s", qPrintable(QSystemError::windowsComString(hr)));
4633 return false;
4634 }
4635 handle = QD3D12Resource::addToPool(&rhiD->resourcePool, resource, D3D12_RESOURCE_STATE_RENDER_TARGET, allocation);
4636 rtv = rhiD->rtvPool.allocate(1);
4637 if (!rtv.isValid())
4638 return false;
4639 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
4640 rtvDesc.Format = dxgiFormat;
4641 rtvDesc.ViewDimension = sampleDesc.Count > 1 ? D3D12_RTV_DIMENSION_TEXTURE2DMS
4642 : D3D12_RTV_DIMENSION_TEXTURE2D;
4643 rhiD->dev->CreateRenderTargetView(resource, &rtvDesc, rtv.cpuHandle);
4644 }
4645 break;
4646 case QRhiRenderBuffer::DepthStencil:
4647 {
4648 dxgiFormat = DS_FORMAT;
4649 sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, dxgiFormat);
4650 D3D12_RESOURCE_DESC resourceDesc = {};
4651 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
4652 resourceDesc.Width = UINT64(m_pixelSize.width());
4653 resourceDesc.Height = UINT(m_pixelSize.height());
4654 resourceDesc.DepthOrArraySize = 1;
4655 resourceDesc.MipLevels = 1;
4656 resourceDesc.Format = dxgiFormat;
4657 resourceDesc.SampleDesc = sampleDesc;
4658 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
4659 resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
4660 if (m_flags.testFlag(UsedWithSwapChainOnly))
4661 resourceDesc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
4662 D3D12_CLEAR_VALUE clearValue = {};
4663 clearValue.Format = dxgiFormat;
4664 clearValue.DepthStencil.Depth = 1.0f;
4665 clearValue.DepthStencil.Stencil = 0;
4666 ID3D12Resource *resource = nullptr;
4667 D3D12MA::Allocation *allocation = nullptr;
4668 HRESULT hr = rhiD->vma.createResource(D3D12_HEAP_TYPE_DEFAULT,
4669 &resourceDesc,
4670 D3D12_RESOURCE_STATE_DEPTH_WRITE,
4671 &clearValue,
4672 &allocation,
4673 __uuidof(ID3D12Resource),
4674 reinterpret_cast<void **>(&resource));
4675 if (FAILED(hr)) {
4676 qWarning("Failed to create depth-stencil buffer: %s", qPrintable(QSystemError::windowsComString(hr)));
4677 return false;
4678 }
4679 handle = QD3D12Resource::addToPool(&rhiD->resourcePool, resource, D3D12_RESOURCE_STATE_DEPTH_WRITE, allocation);
4680 dsv = rhiD->dsvPool.allocate(1);
4681 if (!dsv.isValid())
4682 return false;
4683 D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {};
4684 dsvDesc.Format = dxgiFormat;
4685 dsvDesc.ViewDimension = sampleDesc.Count > 1 ? D3D12_DSV_DIMENSION_TEXTURE2DMS
4686 : D3D12_DSV_DIMENSION_TEXTURE2D;
4687 rhiD->dev->CreateDepthStencilView(resource, &dsvDesc, dsv.cpuHandle);
4688 }
4689 break;
4690 }
4691
4692 if (!m_objectName.isEmpty()) {
4693 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handle)) {
4694 const QString name = QString::fromUtf8(m_objectName);
4695 res->resource->SetName(reinterpret_cast<LPCWSTR>(name.utf16()));
4696 }
4697 }
4698
4699 generation += 1;
4700 rhiD->registerResource(this);
4701 return true;
4702}
4703
4704QRhiTexture::Format QD3D12RenderBuffer::backingFormat() const
4705{
4706 if (m_backingFormatHint != QRhiTexture::UnknownFormat)
4707 return m_backingFormatHint;
4708 else
4709 return m_type == Color ? QRhiTexture::RGBA8 : QRhiTexture::UnknownFormat;
4710}
4711
4712QD3D12Texture::QD3D12Texture(QRhiImplementation *rhi, Format format, const QSize &pixelSize, int depth,
4713 int arraySize, int sampleCount, Flags flags)
4714 : QRhiTexture(rhi, format, pixelSize, depth, arraySize, sampleCount, flags)
4715{
4716}
4717
4718QD3D12Texture::~QD3D12Texture()
4719{
4720 destroy();
4721}
4722
4723void QD3D12Texture::destroy()
4724{
4725 if (handle.isNull())
4726 return;
4727
4728 QRHI_RES_RHI(QRhiD3D12);
4729 if (rhiD)
4730 rhiD->releaseQueue.deferredReleaseResourceWithViews(handle, &rhiD->cbvSrvUavPool, srv, 1);
4731
4732 handle = {};
4733 srv = {};
4734
4735 if (rhiD)
4736 rhiD->unregisterResource(this);
4737}
4738
4739static inline DXGI_FORMAT toD3DDepthTextureSRVFormat(QRhiTexture::Format format)
4740{
4741 switch (format) {
4742 case QRhiTexture::Format::D16:
4743 return DXGI_FORMAT_R16_FLOAT;
4744 case QRhiTexture::Format::D24:
4745 return DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
4746 case QRhiTexture::Format::D24S8:
4747 return DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
4748 case QRhiTexture::Format::D32F:
4749 return DXGI_FORMAT_R32_FLOAT;
4750 case QRhiTexture::Format::D32FS8:
4751 return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
4752 default:
4753 break;
4754 }
4755 Q_UNREACHABLE_RETURN(DXGI_FORMAT_R32_FLOAT);
4756}
4757
4758static inline DXGI_FORMAT toD3DDepthTextureDSVFormat(QRhiTexture::Format format)
4759{
4760 // here the result cannot be typeless
4761 switch (format) {
4762 case QRhiTexture::Format::D16:
4763 return DXGI_FORMAT_D16_UNORM;
4764 case QRhiTexture::Format::D24:
4765 return DXGI_FORMAT_D24_UNORM_S8_UINT;
4766 case QRhiTexture::Format::D24S8:
4767 return DXGI_FORMAT_D24_UNORM_S8_UINT;
4768 case QRhiTexture::Format::D32F:
4769 return DXGI_FORMAT_D32_FLOAT;
4770 case QRhiTexture::Format::D32FS8:
4771 return DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
4772 default:
4773 break;
4774 }
4775 Q_UNREACHABLE_RETURN(DXGI_FORMAT_D32_FLOAT);
4776}
4777
4778static inline bool isDepthTextureFormat(QRhiTexture::Format format)
4779{
4780 switch (format) {
4781 case QRhiTexture::Format::D16:
4782 case QRhiTexture::Format::D24:
4783 case QRhiTexture::Format::D24S8:
4784 case QRhiTexture::Format::D32F:
4785 case QRhiTexture::Format::D32FS8:
4786 return true;
4787 default:
4788 return false;
4789 }
4790}
4791
4792bool QD3D12Texture::prepareCreate(QSize *adjustedSize)
4793{
4794 if (!handle.isNull())
4795 destroy();
4796
4797 QRHI_RES_RHI(QRhiD3D12);
4798 if (!rhiD->isTextureFormatSupported(m_format, m_flags))
4799 return false;
4800
4801 const bool isDepth = isDepthTextureFormat(m_format);
4802 const bool isCube = m_flags.testFlag(CubeMap);
4803 const bool is3D = m_flags.testFlag(ThreeDimensional);
4804 const bool isArray = m_flags.testFlag(TextureArray);
4805 const bool hasMipMaps = m_flags.testFlag(MipMapped);
4806 const bool is1D = m_flags.testFlag(OneDimensional);
4807
4808 const QSize size = is1D ? QSize(qMax(1, m_pixelSize.width()), 1)
4809 : (m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize);
4810
4811 dxgiFormat = toD3DTextureFormat(m_format, m_flags);
4812 if (isDepth) {
4813 srvFormat = toD3DDepthTextureSRVFormat(m_format);
4814 rtFormat = toD3DDepthTextureDSVFormat(m_format);
4815 } else {
4816 srvFormat = dxgiFormat;
4817 rtFormat = dxgiFormat;
4818 }
4819 if (m_writeViewFormat.format != UnknownFormat) {
4820 if (isDepth)
4821 rtFormat = toD3DDepthTextureDSVFormat(m_writeViewFormat.format);
4822 else
4823 rtFormat = toD3DTextureFormat(m_writeViewFormat.format, m_writeViewFormat.srgb ? sRGB : Flags());
4824 }
4825 if (m_readViewFormat.format != UnknownFormat) {
4826 if (isDepth)
4827 srvFormat = toD3DDepthTextureSRVFormat(m_readViewFormat.format);
4828 else
4829 srvFormat = toD3DTextureFormat(m_readViewFormat.format, m_readViewFormat.srgb ? sRGB : Flags());
4830 }
4831
4832 mipLevelCount = uint(hasMipMaps ? rhiD->q->mipLevelsForSize(size) : 1);
4833 sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, dxgiFormat);
4834 if (sampleDesc.Count > 1) {
4835 if (isCube) {
4836 qWarning("Cubemap texture cannot be multisample");
4837 return false;
4838 }
4839 if (is3D) {
4840 qWarning("3D texture cannot be multisample");
4841 return false;
4842 }
4843 if (hasMipMaps) {
4844 qWarning("Multisample texture cannot have mipmaps");
4845 return false;
4846 }
4847 }
4848 if (isDepth && hasMipMaps) {
4849 qWarning("Depth texture cannot have mipmaps");
4850 return false;
4851 }
4852 if (isCube && is3D) {
4853 qWarning("Texture cannot be both cube and 3D");
4854 return false;
4855 }
4856 if (isArray && is3D) {
4857 qWarning("Texture cannot be both array and 3D");
4858 return false;
4859 }
4860 if (isCube && is1D) {
4861 qWarning("Texture cannot be both cube and 1D");
4862 return false;
4863 }
4864 if (is1D && is3D) {
4865 qWarning("Texture cannot be both 1D and 3D");
4866 return false;
4867 }
4868 if (m_depth > 1 && !is3D) {
4869 qWarning("Texture cannot have a depth of %d when it is not 3D", m_depth);
4870 return false;
4871 }
4872 if (m_arraySize > 0 && !isArray) {
4873 qWarning("Texture cannot have an array size of %d when it is not an array", m_arraySize);
4874 return false;
4875 }
4876 if (m_arraySize < 1 && isArray) {
4877 qWarning("Texture is an array but array size is %d", m_arraySize);
4878 return false;
4879 }
4880
4881 if (adjustedSize)
4882 *adjustedSize = size;
4883
4884 return true;
4885}
4886
4887bool QD3D12Texture::finishCreate()
4888{
4889 QRHI_RES_RHI(QRhiD3D12);
4890 const bool isCube = m_flags.testFlag(CubeMap);
4891 const bool is3D = m_flags.testFlag(ThreeDimensional);
4892 const bool isArray = m_flags.testFlag(TextureArray);
4893 const bool is1D = m_flags.testFlag(OneDimensional);
4894
4895 D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
4896 srvDesc.Format = srvFormat;
4897 srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
4898
4899 if (isCube) {
4900 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
4901 srvDesc.TextureCube.MipLevels = mipLevelCount;
4902 } else {
4903 if (is1D) {
4904 if (isArray) {
4905 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY;
4906 srvDesc.Texture1DArray.MipLevels = mipLevelCount;
4907 if (m_arrayRangeStart >= 0 && m_arrayRangeLength >= 0) {
4908 srvDesc.Texture1DArray.FirstArraySlice = UINT(m_arrayRangeStart);
4909 srvDesc.Texture1DArray.ArraySize = UINT(m_arrayRangeLength);
4910 } else {
4911 srvDesc.Texture1DArray.FirstArraySlice = 0;
4912 srvDesc.Texture1DArray.ArraySize = UINT(qMax(0, m_arraySize));
4913 }
4914 } else {
4915 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D;
4916 srvDesc.Texture1D.MipLevels = mipLevelCount;
4917 }
4918 } else if (isArray) {
4919 if (sampleDesc.Count > 1) {
4920 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY;
4921 if (m_arrayRangeStart >= 0 && m_arrayRangeLength >= 0) {
4922 srvDesc.Texture2DMSArray.FirstArraySlice = UINT(m_arrayRangeStart);
4923 srvDesc.Texture2DMSArray.ArraySize = UINT(m_arrayRangeLength);
4924 } else {
4925 srvDesc.Texture2DMSArray.FirstArraySlice = 0;
4926 srvDesc.Texture2DMSArray.ArraySize = UINT(qMax(0, m_arraySize));
4927 }
4928 } else {
4929 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
4930 srvDesc.Texture2DArray.MipLevels = mipLevelCount;
4931 if (m_arrayRangeStart >= 0 && m_arrayRangeLength >= 0) {
4932 srvDesc.Texture2DArray.FirstArraySlice = UINT(m_arrayRangeStart);
4933 srvDesc.Texture2DArray.ArraySize = UINT(m_arrayRangeLength);
4934 } else {
4935 srvDesc.Texture2DArray.FirstArraySlice = 0;
4936 srvDesc.Texture2DArray.ArraySize = UINT(qMax(0, m_arraySize));
4937 }
4938 }
4939 } else {
4940 if (sampleDesc.Count > 1) {
4941 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS;
4942 } else if (is3D) {
4943 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
4944 srvDesc.Texture3D.MipLevels = mipLevelCount;
4945 } else {
4946 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
4947 srvDesc.Texture2D.MipLevels = mipLevelCount;
4948 }
4949 }
4950 }
4951
4952 srv = rhiD->cbvSrvUavPool.allocate(1);
4953 if (!srv.isValid())
4954 return false;
4955
4956 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handle)) {
4957 rhiD->dev->CreateShaderResourceView(res->resource, &srvDesc, srv.cpuHandle);
4958 if (!m_objectName.isEmpty()) {
4959 const QString name = QString::fromUtf8(m_objectName);
4960 res->resource->SetName(reinterpret_cast<LPCWSTR>(name.utf16()));
4961 }
4962 } else {
4963 return false;
4964 }
4965
4966 generation += 1;
4967 return true;
4968}
4969
4970bool QD3D12Texture::create()
4971{
4972 QSize size;
4973 if (!prepareCreate(&size))
4974 return false;
4975
4976 const bool isDepth = isDepthTextureFormat(m_format);
4977 const bool isCube = m_flags.testFlag(CubeMap);
4978 const bool is3D = m_flags.testFlag(ThreeDimensional);
4979 const bool isArray = m_flags.testFlag(TextureArray);
4980 const bool is1D = m_flags.testFlag(OneDimensional);
4981
4982 QRHI_RES_RHI(QRhiD3D12);
4983
4984 bool needsOptimizedClearValueSpecified = false;
4985 UINT resourceFlags = 0;
4986 if (m_flags.testFlag(RenderTarget) || sampleDesc.Count > 1) {
4987 if (isDepth)
4988 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
4989 else
4990 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
4991 needsOptimizedClearValueSpecified = true;
4992 }
4993 if (m_flags.testFlag(UsedWithGenerateMips)) {
4994 if (isDepth) {
4995 qWarning("Depth texture cannot have mipmaps generated");
4996 return false;
4997 }
4998 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
4999 }
5000 if (m_flags.testFlag(UsedWithLoadStore))
5001 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
5002
5003 D3D12_RESOURCE_DESC resourceDesc = {};
5004 resourceDesc.Dimension = is1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D
5005 : (is3D ? D3D12_RESOURCE_DIMENSION_TEXTURE3D
5006 : D3D12_RESOURCE_DIMENSION_TEXTURE2D);
5007 resourceDesc.Width = UINT64(size.width());
5008 resourceDesc.Height = UINT(size.height());
5009 resourceDesc.DepthOrArraySize = isCube ? 6
5010 : (isArray ? UINT(qMax(0, m_arraySize))
5011 : (is3D ? qMax(1, m_depth)
5012 : 1));
5013 resourceDesc.MipLevels = mipLevelCount;
5014 resourceDesc.Format = dxgiFormat;
5015 resourceDesc.SampleDesc = sampleDesc;
5016 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
5017 resourceDesc.Flags = D3D12_RESOURCE_FLAGS(resourceFlags);
5018 D3D12_CLEAR_VALUE clearValue = {};
5019 clearValue.Format = dxgiFormat;
5020 if (isDepth) {
5021 clearValue.Format = toD3DDepthTextureDSVFormat(m_format);
5022 clearValue.DepthStencil.Depth = 1.0f;
5023 clearValue.DepthStencil.Stencil = 0;
5024 }
5025 ID3D12Resource *resource = nullptr;
5026 D3D12MA::Allocation *allocation = nullptr;
5027 HRESULT hr = rhiD->vma.createResource(D3D12_HEAP_TYPE_DEFAULT,
5028 &resourceDesc,
5029 D3D12_RESOURCE_STATE_COMMON,
5030 needsOptimizedClearValueSpecified ? &clearValue : nullptr,
5031 &allocation,
5032 __uuidof(ID3D12Resource),
5033 reinterpret_cast<void **>(&resource));
5034 if (FAILED(hr)) {
5035 qWarning("Failed to create texture: '%s'"
5036 " Dim was %d Size was %ux%u Depth/ArraySize was %u MipLevels was %u Format was %d Sample count was %d",
5037 qPrintable(QSystemError::windowsComString(hr)),
5038 int(resourceDesc.Dimension),
5039 uint(resourceDesc.Width),
5040 uint(resourceDesc.Height),
5041 uint(resourceDesc.DepthOrArraySize),
5042 uint(resourceDesc.MipLevels),
5043 int(resourceDesc.Format),
5044 int(resourceDesc.SampleDesc.Count));
5045 return false;
5046 }
5047
5048 handle = QD3D12Resource::addToPool(&rhiD->resourcePool, resource, D3D12_RESOURCE_STATE_COMMON, allocation);
5049
5050 if (!finishCreate())
5051 return false;
5052
5053 rhiD->registerResource(this);
5054 return true;
5055}
5056
5057bool QD3D12Texture::createFrom(QRhiTexture::NativeTexture src)
5058{
5059 if (!src.object)
5060 return false;
5061
5062 if (!prepareCreate())
5063 return false;
5064
5065 ID3D12Resource *resource = reinterpret_cast<ID3D12Resource *>(src.object);
5066 D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATES(src.layout);
5067
5068 QRHI_RES_RHI(QRhiD3D12);
5069 handle = QD3D12Resource::addNonOwningToPool(&rhiD->resourcePool, resource, state);
5070
5071 if (!finishCreate())
5072 return false;
5073
5074 rhiD->registerResource(this);
5075 return true;
5076}
5077
5078QRhiTexture::NativeTexture QD3D12Texture::nativeTexture()
5079{
5080 QRHI_RES_RHI(QRhiD3D12);
5081 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handle))
5082 return { quint64(res->resource), int(res->state) };
5083
5084 return {};
5085}
5086
5087void QD3D12Texture::setNativeLayout(int layout)
5088{
5089 QRHI_RES_RHI(QRhiD3D12);
5090 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handle))
5091 res->state = D3D12_RESOURCE_STATES(layout);
5092}
5093
5094QD3D12Sampler::QD3D12Sampler(QRhiImplementation *rhi, Filter magFilter, Filter minFilter, Filter mipmapMode,
5095 AddressMode u, AddressMode v, AddressMode w)
5096 : QRhiSampler(rhi, magFilter, minFilter, mipmapMode, u, v, w)
5097{
5098}
5099
5100QD3D12Sampler::~QD3D12Sampler()
5101{
5102 destroy();
5103}
5104
5105void QD3D12Sampler::destroy()
5106{
5107 shaderVisibleDescriptor = {};
5108
5109 QRHI_RES_RHI(QRhiD3D12);
5110 if (rhiD)
5111 rhiD->unregisterResource(this);
5112}
5113
5114static inline D3D12_FILTER toD3DFilter(QRhiSampler::Filter minFilter, QRhiSampler::Filter magFilter, QRhiSampler::Filter mipFilter)
5115{
5116 if (minFilter == QRhiSampler::Nearest) {
5117 if (magFilter == QRhiSampler::Nearest) {
5118 if (mipFilter == QRhiSampler::Linear)
5119 return D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
5120 else
5121 return D3D12_FILTER_MIN_MAG_MIP_POINT;
5122 } else {
5123 if (mipFilter == QRhiSampler::Linear)
5124 return D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR;
5125 else
5126 return D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT;
5127 }
5128 } else {
5129 if (magFilter == QRhiSampler::Nearest) {
5130 if (mipFilter == QRhiSampler::Linear)
5131 return D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR;
5132 else
5133 return D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
5134 } else {
5135 if (mipFilter == QRhiSampler::Linear)
5136 return D3D12_FILTER_MIN_MAG_MIP_LINEAR;
5137 else
5138 return D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
5139 }
5140 }
5141 Q_UNREACHABLE_RETURN(D3D12_FILTER_MIN_MAG_MIP_LINEAR);
5142}
5143
5144static inline D3D12_TEXTURE_ADDRESS_MODE toD3DAddressMode(QRhiSampler::AddressMode m)
5145{
5146 switch (m) {
5147 case QRhiSampler::Repeat:
5148 return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
5149 case QRhiSampler::ClampToEdge:
5150 return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
5151 case QRhiSampler::Mirror:
5152 return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
5153 }
5154 Q_UNREACHABLE_RETURN(D3D12_TEXTURE_ADDRESS_MODE_CLAMP);
5155}
5156
5157static inline D3D12_COMPARISON_FUNC toD3DTextureComparisonFunc(QRhiSampler::CompareOp op)
5158{
5159 switch (op) {
5160 case QRhiSampler::Never:
5161 return D3D12_COMPARISON_FUNC_NEVER;
5162 case QRhiSampler::Less:
5163 return D3D12_COMPARISON_FUNC_LESS;
5164 case QRhiSampler::Equal:
5165 return D3D12_COMPARISON_FUNC_EQUAL;
5166 case QRhiSampler::LessOrEqual:
5167 return D3D12_COMPARISON_FUNC_LESS_EQUAL;
5168 case QRhiSampler::Greater:
5169 return D3D12_COMPARISON_FUNC_GREATER;
5170 case QRhiSampler::NotEqual:
5171 return D3D12_COMPARISON_FUNC_NOT_EQUAL;
5172 case QRhiSampler::GreaterOrEqual:
5173 return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
5174 case QRhiSampler::Always:
5175 return D3D12_COMPARISON_FUNC_ALWAYS;
5176 }
5177 Q_UNREACHABLE_RETURN(D3D12_COMPARISON_FUNC_NEVER);
5178}
5179
5180bool QD3D12Sampler::create()
5181{
5182 desc = {};
5183 desc.Filter = toD3DFilter(m_minFilter, m_magFilter, m_mipmapMode);
5184 if (m_compareOp != Never)
5185 desc.Filter = D3D12_FILTER(desc.Filter | 0x80);
5186 desc.AddressU = toD3DAddressMode(m_addressU);
5187 desc.AddressV = toD3DAddressMode(m_addressV);
5188 desc.AddressW = toD3DAddressMode(m_addressW);
5189 desc.MaxAnisotropy = 1.0f;
5190 desc.ComparisonFunc = toD3DTextureComparisonFunc(m_compareOp);
5191 desc.MaxLOD = m_mipmapMode == None ? 0.0f : 10000.0f;
5192
5193 QRHI_RES_RHI(QRhiD3D12);
5194 rhiD->registerResource(this, false);
5195 return true;
5196}
5197
5198QD3D12Descriptor QD3D12Sampler::lookupOrCreateShaderVisibleDescriptor()
5199{
5200 if (!shaderVisibleDescriptor.isValid()) {
5201 QRHI_RES_RHI(QRhiD3D12);
5202 shaderVisibleDescriptor = rhiD->samplerMgr.getShaderVisibleDescriptor(desc);
5203 }
5204 return shaderVisibleDescriptor;
5205}
5206
5207QD3D12ShadingRateMap::QD3D12ShadingRateMap(QRhiImplementation *rhi)
5208 : QRhiShadingRateMap(rhi)
5209{
5210}
5211
5212QD3D12ShadingRateMap::~QD3D12ShadingRateMap()
5213{
5214 destroy();
5215}
5216
5217void QD3D12ShadingRateMap::destroy()
5218{
5219 if (handle.isNull())
5220 return;
5221
5222 handle = {};
5223}
5224
5225bool QD3D12ShadingRateMap::createFrom(QRhiTexture *src)
5226{
5227 if (!handle.isNull())
5228 destroy();
5229
5230 handle = QRHI_RES(QD3D12Texture, src)->handle;
5231
5232 return true;
5233}
5234
5235QD3D12TextureRenderTarget::QD3D12TextureRenderTarget(QRhiImplementation *rhi,
5236 const QRhiTextureRenderTargetDescription &desc,
5237 Flags flags)
5238 : QRhiTextureRenderTarget(rhi, desc, flags),
5239 d(rhi)
5240{
5241}
5242
5243QD3D12TextureRenderTarget::~QD3D12TextureRenderTarget()
5244{
5245 destroy();
5246}
5247
5248void QD3D12TextureRenderTarget::destroy()
5249{
5250 if (!rtv[0].isValid() && !dsv.isValid())
5251 return;
5252
5253 QRHI_RES_RHI(QRhiD3D12);
5254 if (dsv.isValid()) {
5255 if (ownsDsv && rhiD)
5256 rhiD->releaseQueue.deferredReleaseViews(&rhiD->dsvPool, dsv, 1);
5257 dsv = {};
5258 }
5259
5260 for (int i = 0; i < QD3D12RenderTargetData::MAX_COLOR_ATTACHMENTS; ++i) {
5261 if (rtv[i].isValid()) {
5262 if (ownsRtv[i] && rhiD)
5263 rhiD->releaseQueue.deferredReleaseViews(&rhiD->rtvPool, rtv[i], 1);
5264 rtv[i] = {};
5265 }
5266 }
5267
5268 if (rhiD)
5269 rhiD->unregisterResource(this);
5270}
5271
5272QRhiRenderPassDescriptor *QD3D12TextureRenderTarget::newCompatibleRenderPassDescriptor()
5273{
5274 // not yet built so cannot rely on data computed in create()
5275
5276 QD3D12RenderPassDescriptor *rpD = new QD3D12RenderPassDescriptor(m_rhi);
5277
5278 rpD->colorAttachmentCount = 0;
5279 for (auto it = m_desc.cbeginColorAttachments(), itEnd = m_desc.cendColorAttachments(); it != itEnd; ++it) {
5280 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, it->texture());
5281 QD3D12RenderBuffer *rbD = QRHI_RES(QD3D12RenderBuffer, it->renderBuffer());
5282 if (texD)
5283 rpD->colorFormat[rpD->colorAttachmentCount] = texD->rtFormat;
5284 else if (rbD)
5285 rpD->colorFormat[rpD->colorAttachmentCount] = rbD->dxgiFormat;
5286 rpD->colorAttachmentCount += 1;
5287 }
5288
5289 rpD->hasDepthStencil = false;
5290 if (m_desc.depthStencilBuffer()) {
5291 rpD->hasDepthStencil = true;
5292 rpD->dsFormat = QD3D12RenderBuffer::DS_FORMAT;
5293 } else if (m_desc.depthTexture()) {
5294 QD3D12Texture *depthTexD = QRHI_RES(QD3D12Texture, m_desc.depthTexture());
5295 rpD->hasDepthStencil = true;
5296 rpD->dsFormat = toD3DDepthTextureDSVFormat(depthTexD->format()); // cannot be a typeless format
5297 }
5298
5299 rpD->hasShadingRateMap = m_desc.shadingRateMap() != nullptr;
5300
5301 rpD->updateSerializedFormat();
5302
5303 QRHI_RES_RHI(QRhiD3D12);
5304 rhiD->registerResource(rpD);
5305 return rpD;
5306}
5307
5308bool QD3D12TextureRenderTarget::create()
5309{
5310 if (rtv[0].isValid() || dsv.isValid())
5311 destroy();
5312
5313 QRHI_RES_RHI(QRhiD3D12);
5314 Q_ASSERT(m_desc.colorAttachmentCount() > 0 || m_desc.depthTexture());
5315 Q_ASSERT(!m_desc.depthStencilBuffer() || !m_desc.depthTexture());
5316 const bool hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture();
5317 d.colorAttCount = 0;
5318 int attIndex = 0;
5319
5320 for (auto it = m_desc.cbeginColorAttachments(), itEnd = m_desc.cendColorAttachments(); it != itEnd; ++it, ++attIndex) {
5321 d.colorAttCount += 1;
5322 const QRhiColorAttachment &colorAtt(*it);
5323 QRhiTexture *texture = colorAtt.texture();
5324 QRhiRenderBuffer *rb = colorAtt.renderBuffer();
5325 Q_ASSERT(texture || rb);
5326 if (texture) {
5327 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, texture);
5328 QD3D12Resource *res = rhiD->resourcePool.lookupRef(texD->handle);
5329 if (!res) {
5330 qWarning("Could not look up texture handle for render target");
5331 return false;
5332 }
5333 const bool isMultiView = it->multiViewCount() >= 2;
5334 UINT layerCount = isMultiView ? UINT(it->multiViewCount()) : 1;
5335 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
5336 rtvDesc.Format = texD->rtFormat;
5337 if (texD->flags().testFlag(QRhiTexture::CubeMap)) {
5338 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
5339 rtvDesc.Texture2DArray.MipSlice = UINT(colorAtt.level());
5340 rtvDesc.Texture2DArray.FirstArraySlice = UINT(colorAtt.layer());
5341 rtvDesc.Texture2DArray.ArraySize = layerCount;
5342 } else if (texD->flags().testFlag(QRhiTexture::OneDimensional)) {
5343 if (texD->flags().testFlag(QRhiTexture::TextureArray)) {
5344 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY;
5345 rtvDesc.Texture1DArray.MipSlice = UINT(colorAtt.level());
5346 rtvDesc.Texture1DArray.FirstArraySlice = UINT(colorAtt.layer());
5347 rtvDesc.Texture1DArray.ArraySize = layerCount;
5348 } else {
5349 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D;
5350 rtvDesc.Texture1D.MipSlice = UINT(colorAtt.level());
5351 }
5352 } else if (texD->flags().testFlag(QRhiTexture::TextureArray)) {
5353 if (texD->sampleDesc.Count > 1) {
5354 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY;
5355 rtvDesc.Texture2DMSArray.FirstArraySlice = UINT(colorAtt.layer());
5356 rtvDesc.Texture2DMSArray.ArraySize = layerCount;
5357 } else {
5358 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
5359 rtvDesc.Texture2DArray.MipSlice = UINT(colorAtt.level());
5360 rtvDesc.Texture2DArray.FirstArraySlice = UINT(colorAtt.layer());
5361 rtvDesc.Texture2DArray.ArraySize = layerCount;
5362 }
5363 } else if (texD->flags().testFlag(QRhiTexture::ThreeDimensional)) {
5364 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D;
5365 rtvDesc.Texture3D.MipSlice = UINT(colorAtt.level());
5366 rtvDesc.Texture3D.FirstWSlice = UINT(colorAtt.layer());
5367 rtvDesc.Texture3D.WSize = layerCount;
5368 } else {
5369 if (texD->sampleDesc.Count > 1) {
5370 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS;
5371 } else {
5372 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
5373 rtvDesc.Texture2D.MipSlice = UINT(colorAtt.level());
5374 }
5375 }
5376 rtv[attIndex] = rhiD->rtvPool.allocate(1);
5377 if (!rtv[attIndex].isValid()) {
5378 qWarning("Failed to allocate RTV for texture render target");
5379 return false;
5380 }
5381 rhiD->dev->CreateRenderTargetView(res->resource, &rtvDesc, rtv[attIndex].cpuHandle);
5382 ownsRtv[attIndex] = true;
5383 if (attIndex == 0) {
5384 d.pixelSize = rhiD->q->sizeForMipLevel(colorAtt.level(), texD->pixelSize());
5385 d.sampleCount = int(texD->sampleDesc.Count);
5386 }
5387 } else if (rb) {
5388 QD3D12RenderBuffer *rbD = QRHI_RES(QD3D12RenderBuffer, rb);
5389 ownsRtv[attIndex] = false;
5390 rtv[attIndex] = rbD->rtv;
5391 if (attIndex == 0) {
5392 d.pixelSize = rbD->pixelSize();
5393 d.sampleCount = int(rbD->sampleDesc.Count);
5394 }
5395 }
5396 }
5397
5398 d.dpr = 1;
5399
5400 if (hasDepthStencil) {
5401 if (m_desc.depthTexture()) {
5402 ownsDsv = true;
5403 QD3D12Texture *depthTexD = QRHI_RES(QD3D12Texture, m_desc.depthTexture());
5404 QD3D12Resource *res = rhiD->resourcePool.lookupRef(depthTexD->handle);
5405 if (!res) {
5406 qWarning("Could not look up depth texture handle");
5407 return false;
5408 }
5409 D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {};
5410 dsvDesc.Format = depthTexD->rtFormat;
5411 const bool isMultisample = depthTexD->sampleDesc.Count > 1;
5412 if (depthTexD->flags().testFlag(QRhiTexture::TextureArray)) {
5413 if (isMultisample) {
5414 dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY;
5415 if (m_desc.depthLayer() >= 0) {
5416 dsvDesc.Texture2DMSArray.FirstArraySlice = UINT(m_desc.depthLayer());
5417 dsvDesc.Texture2DMSArray.ArraySize = 1;
5418 } else if (depthTexD->arrayRangeStart() >= 0 && depthTexD->arrayRangeLength() >= 0) {
5419 dsvDesc.Texture2DMSArray.FirstArraySlice = UINT(depthTexD->arrayRangeStart());
5420 dsvDesc.Texture2DMSArray.ArraySize = UINT(depthTexD->arrayRangeLength());
5421 } else {
5422 dsvDesc.Texture2DMSArray.FirstArraySlice = 0;
5423 dsvDesc.Texture2DMSArray.ArraySize = UINT(qMax(0, depthTexD->arraySize()));
5424 }
5425 } else {
5426 dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
5427 if (m_desc.depthLayer() >= 0) {
5428 dsvDesc.Texture2DArray.FirstArraySlice = UINT(m_desc.depthLayer());
5429 dsvDesc.Texture2DArray.ArraySize = 1;
5430 } else if (depthTexD->arrayRangeStart() >= 0 && depthTexD->arrayRangeLength() >= 0) {
5431 dsvDesc.Texture2DArray.FirstArraySlice = UINT(depthTexD->arrayRangeStart());
5432 dsvDesc.Texture2DArray.ArraySize = UINT(depthTexD->arrayRangeLength());
5433 } else {
5434 dsvDesc.Texture2DArray.FirstArraySlice = 0;
5435 dsvDesc.Texture2DArray.ArraySize = UINT(qMax(0, depthTexD->arraySize()));
5436 }
5437 }
5438 }
5439 else {
5440 dsvDesc.ViewDimension = isMultisample ? D3D12_DSV_DIMENSION_TEXTURE2DMS
5441 : D3D12_DSV_DIMENSION_TEXTURE2D;
5442 }
5443 dsv = rhiD->dsvPool.allocate(1);
5444 if (!dsv.isValid()) {
5445 qWarning("Failed to allocate DSV for texture render target");
5446 return false;
5447 }
5448 rhiD->dev->CreateDepthStencilView(res->resource, &dsvDesc, dsv.cpuHandle);
5449 if (d.colorAttCount == 0) {
5450 d.pixelSize = depthTexD->pixelSize();
5451 d.sampleCount = int(depthTexD->sampleDesc.Count);
5452 }
5453 } else {
5454 ownsDsv = false;
5455 QD3D12RenderBuffer *depthRbD = QRHI_RES(QD3D12RenderBuffer, m_desc.depthStencilBuffer());
5456 dsv = depthRbD->dsv;
5457 if (d.colorAttCount == 0) {
5458 d.pixelSize = m_desc.depthStencilBuffer()->pixelSize();
5459 d.sampleCount = int(depthRbD->sampleDesc.Count);
5460 }
5461 }
5462 d.dsAttCount = 1;
5463 } else {
5464 d.dsAttCount = 0;
5465 }
5466
5467 D3D12_CPU_DESCRIPTOR_HANDLE nullDescHandle = { 0 };
5468 for (int i = 0; i < QD3D12RenderTargetData::MAX_COLOR_ATTACHMENTS; ++i)
5469 d.rtv[i] = i < d.colorAttCount ? rtv[i].cpuHandle : nullDescHandle;
5470 d.dsv = dsv.cpuHandle;
5471 d.rp = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc);
5472
5473 QRhiRenderTargetAttachmentTracker::updateResIdList<QD3D12Texture, QD3D12RenderBuffer>(m_desc, &d.currentResIdList);
5474
5475 rhiD->registerResource(this);
5476 return true;
5477}
5478
5479QSize QD3D12TextureRenderTarget::pixelSize() const
5480{
5481 if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QD3D12Texture, QD3D12RenderBuffer>(m_desc, d.currentResIdList))
5482 const_cast<QD3D12TextureRenderTarget *>(this)->create();
5483
5484 return d.pixelSize;
5485}
5486
5487float QD3D12TextureRenderTarget::devicePixelRatio() const
5488{
5489 return d.dpr;
5490}
5491
5492int QD3D12TextureRenderTarget::sampleCount() const
5493{
5494 return d.sampleCount;
5495}
5496
5497QD3D12ShaderResourceBindings::QD3D12ShaderResourceBindings(QRhiImplementation *rhi)
5498 : QRhiShaderResourceBindings(rhi)
5499{
5500}
5501
5502QD3D12ShaderResourceBindings::~QD3D12ShaderResourceBindings()
5503{
5504 destroy();
5505}
5506
5507void QD3D12ShaderResourceBindings::destroy()
5508{
5509 QRHI_RES_RHI(QRhiD3D12);
5510 if (rhiD)
5511 rhiD->unregisterResource(this);
5512}
5513
5514bool QD3D12ShaderResourceBindings::create()
5515{
5516 QRHI_RES_RHI(QRhiD3D12);
5517 if (!rhiD->sanityCheckShaderResourceBindings(this))
5518 return false;
5519
5520 rhiD->updateLayoutDesc(this);
5521
5522 hasDynamicOffset = false;
5523 for (const QRhiShaderResourceBinding &b : std::as_const(m_bindings)) {
5524 const QRhiShaderResourceBinding::Data *bd = QRhiImplementation::shaderResourceBindingData(b);
5525 if (bd->type == QRhiShaderResourceBinding::UniformBuffer && bd->u.ubuf.hasDynamicOffset) {
5526 hasDynamicOffset = true;
5527 break;
5528 }
5529 }
5530
5531 // The root signature is not part of the srb. Unintuitive, but the shader
5532 // translation pipeline ties our hands: as long as the per-shader (so per
5533 // stage!) nativeResourceBindingMap exist, meaning f.ex. that a SPIR-V
5534 // combined image sampler binding X passed in here may map to the tY and sY
5535 // HLSL registers, where Y is known only once the mapping table from the
5536 // shader is looked up. Creating a root parameters at this stage is
5537 // therefore impossible.
5538
5539 generation += 1;
5540 rhiD->registerResource(this, false);
5541 return true;
5542}
5543
5544void QD3D12ShaderResourceBindings::updateResources(UpdateFlags flags)
5545{
5546 Q_UNUSED(flags);
5547 generation += 1;
5548}
5549
5550// Accessing the QRhiBuffer/Texture/Sampler resources must be avoided in the
5551// callbacks; that would only be possible if the srb had those specified, and
5552// that's not required at the time of srb and pipeline create() time, and
5553// createRootSignature is called from the pipeline create().
5554
5555void QD3D12ShaderResourceBindings::visitUniformBuffer(QD3D12Stage s,
5556 const QRhiShaderResourceBinding::Data::UniformBufferData &,
5557 int shaderRegister,
5558 int)
5559{
5560 D3D12_ROOT_PARAMETER1 rootParam = {};
5561 rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
5562 rootParam.ShaderVisibility = qd3d12_stageToVisibility(s);
5563 rootParam.Descriptor.ShaderRegister = shaderRegister;
5564 rootParam.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC;
5565 visitorData.cbParams[s].append(rootParam);
5566}
5567
5568void QD3D12ShaderResourceBindings::visitTexture(QD3D12Stage s,
5569 const QRhiShaderResourceBinding::TextureAndSampler &,
5570 int shaderRegister)
5571{
5572 D3D12_DESCRIPTOR_RANGE1 range = {};
5573 range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
5574 range.NumDescriptors = 1;
5575 range.BaseShaderRegister = shaderRegister;
5576 range.OffsetInDescriptorsFromTableStart = visitorData.currentSrvRangeOffset[s];
5577 visitorData.currentSrvRangeOffset[s] += 1;
5578 visitorData.srvRanges[s].append(range);
5579 if (visitorData.srvRanges[s].count() == 1) {
5580 visitorData.srvTables[s].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
5581 visitorData.srvTables[s].ShaderVisibility = qd3d12_stageToVisibility(s);
5582 }
5583}
5584
5585void QD3D12ShaderResourceBindings::visitSampler(QD3D12Stage s,
5586 const QRhiShaderResourceBinding::TextureAndSampler &,
5587 int shaderRegister)
5588{
5589 // Unlike SRVs and UAVs, samplers are handled so that each sampler becomes
5590 // a root parameter with its own descriptor table.
5591
5592 int &rangeStoreIdx(visitorData.samplerRangeHeads[s]);
5593 if (rangeStoreIdx == 16) {
5594 qWarning("Sampler count in QD3D12Stage %d exceeds the limit of 16, this is disallowed by QRhi", s);
5595 return;
5596 }
5597 D3D12_DESCRIPTOR_RANGE1 range = {};
5598 range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
5599 range.NumDescriptors = 1;
5600 range.BaseShaderRegister = shaderRegister;
5601 visitorData.samplerRanges[s][rangeStoreIdx] = range;
5602 D3D12_ROOT_PARAMETER1 param = {};
5603 param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
5604 param.ShaderVisibility = qd3d12_stageToVisibility(s);
5605 param.DescriptorTable.NumDescriptorRanges = 1;
5606 param.DescriptorTable.pDescriptorRanges = &visitorData.samplerRanges[s][rangeStoreIdx];
5607 rangeStoreIdx += 1;
5608 visitorData.samplerTables[s].append(param);
5609}
5610
5611void QD3D12ShaderResourceBindings::visitStorageBuffer(QD3D12Stage s,
5612 const QRhiShaderResourceBinding::Data::StorageBufferData &,
5613 QD3D12ShaderResourceVisitor::StorageOp,
5614 int shaderRegister)
5615{
5616 D3D12_DESCRIPTOR_RANGE1 range = {};
5617 range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
5618 range.NumDescriptors = 1;
5619 range.BaseShaderRegister = shaderRegister;
5620 range.OffsetInDescriptorsFromTableStart = visitorData.currentUavRangeOffset[s];
5621 visitorData.currentUavRangeOffset[s] += 1;
5622 visitorData.uavRanges[s].append(range);
5623 if (visitorData.uavRanges[s].count() == 1) {
5624 visitorData.uavTables[s].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
5625 visitorData.uavTables[s].ShaderVisibility = qd3d12_stageToVisibility(s);
5626 }
5627}
5628
5629void QD3D12ShaderResourceBindings::visitStorageImage(QD3D12Stage s,
5630 const QRhiShaderResourceBinding::Data::StorageImageData &,
5631 QD3D12ShaderResourceVisitor::StorageOp,
5632 int shaderRegister)
5633{
5634 D3D12_DESCRIPTOR_RANGE1 range = {};
5635 range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
5636 range.NumDescriptors = 1;
5637 range.BaseShaderRegister = shaderRegister;
5638 range.OffsetInDescriptorsFromTableStart = visitorData.currentUavRangeOffset[s];
5639 visitorData.currentUavRangeOffset[s] += 1;
5640 visitorData.uavRanges[s].append(range);
5641 if (visitorData.uavRanges[s].count() == 1) {
5642 visitorData.uavTables[s].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
5643 visitorData.uavTables[s].ShaderVisibility = qd3d12_stageToVisibility(s);
5644 }
5645}
5646
5647QD3D12ObjectHandle QD3D12ShaderResourceBindings::createRootSignature(const QD3D12ShaderStageData *stageData,
5648 int stageCount)
5649{
5650 QRHI_RES_RHI(QRhiD3D12);
5651
5652 // It's not just that the root signature has to be tied to the pipeline
5653 // (cannot just freely create it like e.g. with Vulkan where one just
5654 // creates a descriptor layout 1:1 with the QRhiShaderResourceBindings'
5655 // data), due to not knowing the shader-specific resource binding mapping
5656 // tables at the point of srb creation, but each shader stage may have a
5657 // different mapping table. (ugh!)
5658 //
5659 // Hence we set up everything per-stage, even if it means the root
5660 // signature gets unnecessarily big. (note that the magic is in the
5661 // ShaderVisibility: even though the register range is the same in the
5662 // descriptor tables, the visibility is different)
5663
5664 QD3D12ShaderResourceVisitor visitor(this, stageData, stageCount);
5665
5666 visitorData = {};
5667
5668 using namespace std::placeholders;
5669 visitor.uniformBuffer = std::bind(&QD3D12ShaderResourceBindings::visitUniformBuffer, this, _1, _2, _3, _4);
5670 visitor.texture = std::bind(&QD3D12ShaderResourceBindings::visitTexture, this, _1, _2, _3);
5671 visitor.sampler = std::bind(&QD3D12ShaderResourceBindings::visitSampler, this, _1, _2, _3);
5672 visitor.storageBuffer = std::bind(&QD3D12ShaderResourceBindings::visitStorageBuffer, this, _1, _2, _3, _4);
5673 visitor.storageImage = std::bind(&QD3D12ShaderResourceBindings::visitStorageImage, this, _1, _2, _3, _4);
5674
5675 visitor.visit();
5676
5677 // The maximum size of a root signature is 256 bytes, where a descriptor
5678 // table is 4, a root descriptor (e.g. CBV) is 8. We have 5 stages at most
5679 // (or 1 with compute) and a separate descriptor table for SRVs (->
5680 // textures) and UAVs (-> storage buffers and images) per stage, plus each
5681 // uniform buffer counts as a CBV in the stages it is visible.
5682 //
5683 // Due to the limited maximum size of a shader-visible sampler heap (2048)
5684 // and the potential costly switching of descriptor heaps, each sampler is
5685 // declared as a separate root parameter / descriptor table (meaning that
5686 // two samplers in the same stage are two parameters and two tables, not
5687 // just one). QRhi documents a hard limit of 16 on texture/sampler bindings
5688 // in a shader (matching D3D11), so we can hopefully get away with this.
5689 //
5690 // This means that e.g. a vertex+fragment shader with a uniform buffer
5691 // visible in both and one texture+sampler in the fragment shader would
5692 // consume 2*8 + 4 + 4 = 24 bytes. This also implies that clients
5693 // specifying the minimal stage bit mask for each entry in
5694 // QRhiShaderResourceBindings are ideal for this backend since it helps
5695 // reducing the chance of hitting the size limit.
5696
5697 QVarLengthArray<D3D12_ROOT_PARAMETER1, 4> rootParams;
5698 for (int s = 0; s < 6; ++s) {
5699 if (!visitorData.cbParams[s].isEmpty())
5700 rootParams.append(visitorData.cbParams[s].constData(), visitorData.cbParams[s].count());
5701 }
5702 for (int s = 0; s < 6; ++s) {
5703 if (!visitorData.srvRanges[s].isEmpty()) {
5704 visitorData.srvTables[s].DescriptorTable.NumDescriptorRanges = visitorData.srvRanges[s].count();
5705 visitorData.srvTables[s].DescriptorTable.pDescriptorRanges = visitorData.srvRanges[s].constData();
5706 rootParams.append(visitorData.srvTables[s]);
5707 }
5708 }
5709 for (int s = 0; s < 6; ++s) {
5710 if (!visitorData.samplerTables[s].isEmpty())
5711 rootParams.append(visitorData.samplerTables[s].constData(), visitorData.samplerTables[s].count());
5712 }
5713 for (int s = 0; s < 6; ++s) {
5714 if (!visitorData.uavRanges[s].isEmpty()) {
5715 visitorData.uavTables[s].DescriptorTable.NumDescriptorRanges = visitorData.uavRanges[s].count();
5716 visitorData.uavTables[s].DescriptorTable.pDescriptorRanges = visitorData.uavRanges[s].constData();
5717 rootParams.append(visitorData.uavTables[s]);
5718 }
5719 }
5720
5721 D3D12_VERSIONED_ROOT_SIGNATURE_DESC rsDesc = {};
5722 rsDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
5723 if (!rootParams.isEmpty()) {
5724 rsDesc.Desc_1_1.NumParameters = rootParams.count();
5725 rsDesc.Desc_1_1.pParameters = rootParams.constData();
5726 }
5727
5728 UINT rsFlags = 0;
5729 for (int stageIdx = 0; stageIdx < stageCount; ++stageIdx) {
5730 if (stageData[stageIdx].valid && stageData[stageIdx].stage == VS)
5731 rsFlags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
5732 }
5733 rsDesc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAGS(rsFlags);
5734
5735 ID3DBlob *signature = nullptr;
5736 HRESULT hr = D3D12SerializeVersionedRootSignature(&rsDesc, &signature, nullptr);
5737 if (FAILED(hr)) {
5738 qWarning("Failed to serialize root signature: %s", qPrintable(QSystemError::windowsComString(hr)));
5739 return {};
5740 }
5741 ID3D12RootSignature *rootSig = nullptr;
5742 hr = rhiD->dev->CreateRootSignature(0,
5743 signature->GetBufferPointer(),
5744 signature->GetBufferSize(),
5745 __uuidof(ID3D12RootSignature),
5746 reinterpret_cast<void **>(&rootSig));
5747 signature->Release();
5748 if (FAILED(hr)) {
5749 qWarning("Failed to create root signature: %s", qPrintable(QSystemError::windowsComString(hr)));
5750 return {};
5751 }
5752
5753 return QD3D12RootSignature::addToPool(&rhiD->rootSignaturePool, rootSig);
5754}
5755
5756// For shader model < 6.0 we do the same as the D3D11 backend: use the old
5757// compiler (D3DCompile) to generate DXBC, just as qsb does (when -c is passed)
5758// by invoking fxc, not dxc. For SM >= 6.0 we have to use the new compiler and
5759// work with DXIL. And that involves IDxcCompiler and needs the presence of
5760// dxcompiler.dll and dxil.dll at runtime. Plus there's a chance we have
5761// ancient SDK headers when not using MSVC. So this is heavily optional,
5762// meaning support for dxc can be disabled both at build time (no dxcapi.h) and
5763// at run time (no DLLs).
5764
5765static inline void makeHlslTargetString(char target[7], const char stage[3], int version)
5766{
5767 const int smMajor = version / 10;
5768 const int smMinor = version % 10;
5769 target[0] = stage[0];
5770 target[1] = stage[1];
5771 target[2] = '_';
5772 target[3] = '0' + smMajor;
5773 target[4] = '_';
5774 target[5] = '0' + smMinor;
5775 target[6] = '\0';
5776}
5777
5778enum class HlslCompileFlag
5779{
5780 WithDebugInfo = 0x01
5781};
5782
5783static QByteArray legacyCompile(const QShaderCode &hlslSource, const char *target, int flags, QString *error)
5784{
5785 static const pD3DCompile d3dCompile = QRhiD3D::resolveD3DCompile();
5786 if (!d3dCompile) {
5787 qWarning("Unable to resolve function D3DCompile()");
5788 return QByteArray();
5789 }
5790
5791 ID3DBlob *bytecode = nullptr;
5792 ID3DBlob *errors = nullptr;
5793 UINT d3dCompileFlags = 0;
5794 if (flags & int(HlslCompileFlag::WithDebugInfo))
5795 d3dCompileFlags |= D3DCOMPILE_DEBUG;
5796
5797 HRESULT hr = d3dCompile(hlslSource.shader().constData(), SIZE_T(hlslSource.shader().size()),
5798 nullptr, nullptr, nullptr,
5799 hlslSource.entryPoint().constData(), target, d3dCompileFlags, 0, &bytecode, &errors);
5800 if (FAILED(hr) || !bytecode) {
5801 qWarning("HLSL shader compilation failed: 0x%x", uint(hr));
5802 if (errors) {
5803 *error = QString::fromUtf8(static_cast<const char *>(errors->GetBufferPointer()),
5804 int(errors->GetBufferSize()));
5805 errors->Release();
5806 }
5807 return QByteArray();
5808 }
5809
5810 QByteArray result;
5811 result.resize(int(bytecode->GetBufferSize()));
5812 memcpy(result.data(), bytecode->GetBufferPointer(), size_t(result.size()));
5813 bytecode->Release();
5814 return result;
5815}
5816
5817#ifdef QRHI_D3D12_HAS_DXC
5818
5819#ifndef DXC_CP_UTF8
5820#define DXC_CP_UTF8 65001
5821#endif
5822
5823#ifndef DXC_ARG_DEBUG
5824#define DXC_ARG_DEBUG L"-Zi"
5825#endif
5826
5827static QByteArray dxcCompile(const QShaderCode &hlslSource, const char *target, int flags, QString *error)
5828{
5829 static std::pair<IDxcCompiler *, IDxcLibrary *> dxc = QRhiD3D::createDxcCompiler();
5830 IDxcCompiler *compiler = dxc.first;
5831 if (!compiler) {
5832 qWarning("Unable to instantiate IDxcCompiler. Likely no dxcompiler.dll and dxil.dll present. "
5833 "Use windeployqt or try https://github.com/microsoft/DirectXShaderCompiler/releases");
5834 return QByteArray();
5835 }
5836 IDxcLibrary *library = dxc.second;
5837 if (!library)
5838 return QByteArray();
5839
5840 IDxcBlobEncoding *sourceBlob = nullptr;
5841 HRESULT hr = library->CreateBlobWithEncodingOnHeapCopy(hlslSource.shader().constData(),
5842 UINT32(hlslSource.shader().size()),
5843 DXC_CP_UTF8,
5844 &sourceBlob);
5845 if (FAILED(hr)) {
5846 qWarning("Failed to create source blob for dxc: 0x%x (%s)",
5847 uint(hr),
5848 qPrintable(QSystemError::windowsComString(hr)));
5849 return QByteArray();
5850 }
5851
5852 const QString entryPointStr = QString::fromLatin1(hlslSource.entryPoint());
5853 const QString targetStr = QString::fromLatin1(target);
5854
5855 QVarLengthArray<LPCWSTR, 4> argPtrs;
5856 QString debugArg;
5857 if (flags & int(HlslCompileFlag::WithDebugInfo)) {
5858 debugArg = QString::fromUtf16(reinterpret_cast<const char16_t *>(DXC_ARG_DEBUG));
5859 argPtrs.append(reinterpret_cast<LPCWSTR>(debugArg.utf16()));
5860 }
5861
5862 IDxcOperationResult *result = nullptr;
5863 hr = compiler->Compile(sourceBlob,
5864 nullptr,
5865 reinterpret_cast<LPCWSTR>(entryPointStr.utf16()),
5866 reinterpret_cast<LPCWSTR>(targetStr.utf16()),
5867 argPtrs.data(), argPtrs.count(),
5868 nullptr, 0,
5869 nullptr,
5870 &result);
5871 sourceBlob->Release();
5872 if (SUCCEEDED(hr))
5873 result->GetStatus(&hr);
5874 if (FAILED(hr)) {
5875 qWarning("HLSL shader compilation failed: 0x%x (%s)",
5876 uint(hr),
5877 qPrintable(QSystemError::windowsComString(hr)));
5878 if (result) {
5879 IDxcBlobEncoding *errorsBlob = nullptr;
5880 if (SUCCEEDED(result->GetErrorBuffer(&errorsBlob))) {
5881 if (errorsBlob) {
5882 *error = QString::fromUtf8(static_cast<const char *>(errorsBlob->GetBufferPointer()),
5883 int(errorsBlob->GetBufferSize()));
5884 errorsBlob->Release();
5885 }
5886 }
5887 }
5888 return QByteArray();
5889 }
5890
5891 IDxcBlob *bytecode = nullptr;
5892 if FAILED(result->GetResult(&bytecode)) {
5893 qWarning("No result from IDxcCompiler: 0x%x (%s)",
5894 uint(hr),
5895 qPrintable(QSystemError::windowsComString(hr)));
5896 return QByteArray();
5897 }
5898
5899 QByteArray ba;
5900 ba.resize(int(bytecode->GetBufferSize()));
5901 memcpy(ba.data(), bytecode->GetBufferPointer(), size_t(ba.size()));
5902 bytecode->Release();
5903 return ba;
5904}
5905
5906#endif // QRHI_D3D12_HAS_DXC
5907
5908static QByteArray compileHlslShaderSource(const QShader &shader,
5909 QShader::Variant shaderVariant,
5910 int flags,
5911 QString *error,
5912 QShaderKey *usedShaderKey)
5913{
5914 // look for SM 6.7, 6.6, .., 5.0
5915 const int shaderModelMax = 67;
5916 for (int sm = shaderModelMax; sm >= 50; --sm) {
5917 for (QShader::Source type : { QShader::DxilShader, QShader::DxbcShader }) {
5918 QShaderKey key = { type, sm, shaderVariant };
5919 QShaderCode intermediateBytecodeShader = shader.shader(key);
5920 if (!intermediateBytecodeShader.shader().isEmpty()) {
5921 if (usedShaderKey)
5922 *usedShaderKey = key;
5923 return intermediateBytecodeShader.shader();
5924 }
5925 }
5926 }
5927
5928 QShaderCode hlslSource;
5929 QShaderKey key;
5930 for (int sm = shaderModelMax; sm >= 50; --sm) {
5931 key = { QShader::HlslShader, sm, shaderVariant };
5932 hlslSource = shader.shader(key);
5933 if (!hlslSource.shader().isEmpty())
5934 break;
5935 }
5936
5937 if (hlslSource.shader().isEmpty()) {
5938 qWarning() << "No HLSL (shader model 6.7..5.0) code found in baked shader" << shader;
5939 return QByteArray();
5940 }
5941
5942 if (usedShaderKey)
5943 *usedShaderKey = key;
5944
5945 char target[7];
5946 switch (shader.stage()) {
5947 case QShader::VertexStage:
5948 makeHlslTargetString(target, "vs", key.sourceVersion().version());
5949 break;
5950 case QShader::TessellationControlStage:
5951 makeHlslTargetString(target, "hs", key.sourceVersion().version());
5952 break;
5953 case QShader::TessellationEvaluationStage:
5954 makeHlslTargetString(target, "ds", key.sourceVersion().version());
5955 break;
5956 case QShader::GeometryStage:
5957 makeHlslTargetString(target, "gs", key.sourceVersion().version());
5958 break;
5959 case QShader::FragmentStage:
5960 makeHlslTargetString(target, "ps", key.sourceVersion().version());
5961 break;
5962 case QShader::ComputeStage:
5963 makeHlslTargetString(target, "cs", key.sourceVersion().version());
5964 break;
5965 }
5966
5967 if (key.sourceVersion().version() >= 60) {
5968#ifdef QRHI_D3D12_HAS_DXC
5969 return dxcCompile(hlslSource, target, flags, error);
5970#else
5971 qWarning("Attempted to runtime-compile HLSL source code for shader model >= 6.0 "
5972 "but the Qt build has no support for DXC. "
5973 "Rebuild Qt with a recent Windows SDK or switch to an MSVC build.");
5974#endif
5975 }
5976
5977 return legacyCompile(hlslSource, target, flags, error);
5978}
5979
5980static inline UINT8 toD3DColorWriteMask(QRhiGraphicsPipeline::ColorMask c)
5981{
5982 UINT8 f = 0;
5983 if (c.testFlag(QRhiGraphicsPipeline::R))
5984 f |= D3D12_COLOR_WRITE_ENABLE_RED;
5985 if (c.testFlag(QRhiGraphicsPipeline::G))
5986 f |= D3D12_COLOR_WRITE_ENABLE_GREEN;
5987 if (c.testFlag(QRhiGraphicsPipeline::B))
5988 f |= D3D12_COLOR_WRITE_ENABLE_BLUE;
5989 if (c.testFlag(QRhiGraphicsPipeline::A))
5990 f |= D3D12_COLOR_WRITE_ENABLE_ALPHA;
5991 return f;
5992}
5993
5994static inline D3D12_BLEND toD3DBlendFactor(QRhiGraphicsPipeline::BlendFactor f, bool rgb)
5995{
5996 // SrcBlendAlpha and DstBlendAlpha do not accept *_COLOR. With other APIs
5997 // this is handled internally (so that e.g. VK_BLEND_FACTOR_SRC_COLOR is
5998 // accepted and is in effect equivalent to VK_BLEND_FACTOR_SRC_ALPHA when
5999 // set as an alpha src/dest factor), but for D3D we have to take care of it
6000 // ourselves. Hence the rgb argument.
6001
6002 switch (f) {
6003 case QRhiGraphicsPipeline::Zero:
6004 return D3D12_BLEND_ZERO;
6005 case QRhiGraphicsPipeline::One:
6006 return D3D12_BLEND_ONE;
6007 case QRhiGraphicsPipeline::SrcColor:
6008 return rgb ? D3D12_BLEND_SRC_COLOR : D3D12_BLEND_SRC_ALPHA;
6009 case QRhiGraphicsPipeline::OneMinusSrcColor:
6010 return rgb ? D3D12_BLEND_INV_SRC_COLOR : D3D12_BLEND_INV_SRC_ALPHA;
6011 case QRhiGraphicsPipeline::DstColor:
6012 return rgb ? D3D12_BLEND_DEST_COLOR : D3D12_BLEND_DEST_ALPHA;
6013 case QRhiGraphicsPipeline::OneMinusDstColor:
6014 return rgb ? D3D12_BLEND_INV_DEST_COLOR : D3D12_BLEND_INV_DEST_ALPHA;
6015 case QRhiGraphicsPipeline::SrcAlpha:
6016 return D3D12_BLEND_SRC_ALPHA;
6017 case QRhiGraphicsPipeline::OneMinusSrcAlpha:
6018 return D3D12_BLEND_INV_SRC_ALPHA;
6019 case QRhiGraphicsPipeline::DstAlpha:
6020 return D3D12_BLEND_DEST_ALPHA;
6021 case QRhiGraphicsPipeline::OneMinusDstAlpha:
6022 return D3D12_BLEND_INV_DEST_ALPHA;
6023 case QRhiGraphicsPipeline::ConstantColor:
6024 case QRhiGraphicsPipeline::ConstantAlpha:
6025 return D3D12_BLEND_BLEND_FACTOR;
6026 case QRhiGraphicsPipeline::OneMinusConstantColor:
6027 case QRhiGraphicsPipeline::OneMinusConstantAlpha:
6028 return D3D12_BLEND_INV_BLEND_FACTOR;
6029 case QRhiGraphicsPipeline::SrcAlphaSaturate:
6030 return D3D12_BLEND_SRC_ALPHA_SAT;
6031 case QRhiGraphicsPipeline::Src1Color:
6032 return rgb ? D3D12_BLEND_SRC1_COLOR : D3D12_BLEND_SRC1_ALPHA;
6033 case QRhiGraphicsPipeline::OneMinusSrc1Color:
6034 return rgb ? D3D12_BLEND_INV_SRC1_COLOR : D3D12_BLEND_INV_SRC1_ALPHA;
6035 case QRhiGraphicsPipeline::Src1Alpha:
6036 return D3D12_BLEND_SRC1_ALPHA;
6037 case QRhiGraphicsPipeline::OneMinusSrc1Alpha:
6038 return D3D12_BLEND_INV_SRC1_ALPHA;
6039 }
6040 Q_UNREACHABLE_RETURN(D3D12_BLEND_ZERO);
6041}
6042
6043static inline D3D12_BLEND_OP toD3DBlendOp(QRhiGraphicsPipeline::BlendOp op)
6044{
6045 switch (op) {
6046 case QRhiGraphicsPipeline::Add:
6047 return D3D12_BLEND_OP_ADD;
6048 case QRhiGraphicsPipeline::Subtract:
6049 return D3D12_BLEND_OP_SUBTRACT;
6050 case QRhiGraphicsPipeline::ReverseSubtract:
6051 return D3D12_BLEND_OP_REV_SUBTRACT;
6052 case QRhiGraphicsPipeline::Min:
6053 return D3D12_BLEND_OP_MIN;
6054 case QRhiGraphicsPipeline::Max:
6055 return D3D12_BLEND_OP_MAX;
6056 }
6057 Q_UNREACHABLE_RETURN(D3D12_BLEND_OP_ADD);
6058}
6059
6060static inline D3D12_CULL_MODE toD3DCullMode(QRhiGraphicsPipeline::CullMode c)
6061{
6062 switch (c) {
6063 case QRhiGraphicsPipeline::None:
6064 return D3D12_CULL_MODE_NONE;
6065 case QRhiGraphicsPipeline::Front:
6066 return D3D12_CULL_MODE_FRONT;
6067 case QRhiGraphicsPipeline::Back:
6068 return D3D12_CULL_MODE_BACK;
6069 }
6070 Q_UNREACHABLE_RETURN(D3D12_CULL_MODE_NONE);
6071}
6072
6073static inline D3D12_FILL_MODE toD3DFillMode(QRhiGraphicsPipeline::PolygonMode mode)
6074{
6075 switch (mode) {
6076 case QRhiGraphicsPipeline::Fill:
6077 return D3D12_FILL_MODE_SOLID;
6078 case QRhiGraphicsPipeline::Line:
6079 return D3D12_FILL_MODE_WIREFRAME;
6080 }
6081 Q_UNREACHABLE_RETURN(D3D12_FILL_MODE_SOLID);
6082}
6083
6084static inline D3D12_COMPARISON_FUNC toD3DCompareOp(QRhiGraphicsPipeline::CompareOp op)
6085{
6086 switch (op) {
6087 case QRhiGraphicsPipeline::Never:
6088 return D3D12_COMPARISON_FUNC_NEVER;
6089 case QRhiGraphicsPipeline::Less:
6090 return D3D12_COMPARISON_FUNC_LESS;
6091 case QRhiGraphicsPipeline::Equal:
6092 return D3D12_COMPARISON_FUNC_EQUAL;
6093 case QRhiGraphicsPipeline::LessOrEqual:
6094 return D3D12_COMPARISON_FUNC_LESS_EQUAL;
6095 case QRhiGraphicsPipeline::Greater:
6096 return D3D12_COMPARISON_FUNC_GREATER;
6097 case QRhiGraphicsPipeline::NotEqual:
6098 return D3D12_COMPARISON_FUNC_NOT_EQUAL;
6099 case QRhiGraphicsPipeline::GreaterOrEqual:
6100 return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
6101 case QRhiGraphicsPipeline::Always:
6102 return D3D12_COMPARISON_FUNC_ALWAYS;
6103 }
6104 Q_UNREACHABLE_RETURN(D3D12_COMPARISON_FUNC_ALWAYS);
6105}
6106
6107static inline D3D12_STENCIL_OP toD3DStencilOp(QRhiGraphicsPipeline::StencilOp op)
6108{
6109 switch (op) {
6110 case QRhiGraphicsPipeline::StencilZero:
6111 return D3D12_STENCIL_OP_ZERO;
6112 case QRhiGraphicsPipeline::Keep:
6113 return D3D12_STENCIL_OP_KEEP;
6114 case QRhiGraphicsPipeline::Replace:
6115 return D3D12_STENCIL_OP_REPLACE;
6116 case QRhiGraphicsPipeline::IncrementAndClamp:
6117 return D3D12_STENCIL_OP_INCR_SAT;
6118 case QRhiGraphicsPipeline::DecrementAndClamp:
6119 return D3D12_STENCIL_OP_DECR_SAT;
6120 case QRhiGraphicsPipeline::Invert:
6121 return D3D12_STENCIL_OP_INVERT;
6122 case QRhiGraphicsPipeline::IncrementAndWrap:
6123 return D3D12_STENCIL_OP_INCR;
6124 case QRhiGraphicsPipeline::DecrementAndWrap:
6125 return D3D12_STENCIL_OP_DECR;
6126 }
6127 Q_UNREACHABLE_RETURN(D3D12_STENCIL_OP_KEEP);
6128}
6129
6130static inline D3D12_PRIMITIVE_TOPOLOGY toD3DTopology(QRhiGraphicsPipeline::Topology t, int patchControlPointCount)
6131{
6132 switch (t) {
6133 case QRhiGraphicsPipeline::Triangles:
6134 return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
6135 case QRhiGraphicsPipeline::TriangleStrip:
6136 return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
6137 case QRhiGraphicsPipeline::TriangleFan:
6138 qWarning("Triangle fans are not supported with D3D");
6139 return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
6140 case QRhiGraphicsPipeline::Lines:
6141 return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
6142 case QRhiGraphicsPipeline::LineStrip:
6143 return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
6144 case QRhiGraphicsPipeline::Points:
6145 return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
6146 case QRhiGraphicsPipeline::Patches:
6147 Q_ASSERT(patchControlPointCount >= 1 && patchControlPointCount <= 32);
6148 return D3D_PRIMITIVE_TOPOLOGY(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + (patchControlPointCount - 1));
6149 }
6150 Q_UNREACHABLE_RETURN(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
6151}
6152
6153static inline D3D12_PRIMITIVE_TOPOLOGY_TYPE toD3DTopologyType(QRhiGraphicsPipeline::Topology t)
6154{
6155 switch (t) {
6156 case QRhiGraphicsPipeline::Triangles:
6157 case QRhiGraphicsPipeline::TriangleStrip:
6158 case QRhiGraphicsPipeline::TriangleFan:
6159 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
6160 case QRhiGraphicsPipeline::Lines:
6161 case QRhiGraphicsPipeline::LineStrip:
6162 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
6163 case QRhiGraphicsPipeline::Points:
6164 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
6165 case QRhiGraphicsPipeline::Patches:
6166 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
6167 }
6168 Q_UNREACHABLE_RETURN(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE);
6169}
6170
6171static inline DXGI_FORMAT toD3DAttributeFormat(QRhiVertexInputAttribute::Format format)
6172{
6173 switch (format) {
6174 case QRhiVertexInputAttribute::Float4:
6175 return DXGI_FORMAT_R32G32B32A32_FLOAT;
6176 case QRhiVertexInputAttribute::Float3:
6177 return DXGI_FORMAT_R32G32B32_FLOAT;
6178 case QRhiVertexInputAttribute::Float2:
6179 return DXGI_FORMAT_R32G32_FLOAT;
6180 case QRhiVertexInputAttribute::Float:
6181 return DXGI_FORMAT_R32_FLOAT;
6182 case QRhiVertexInputAttribute::UNormByte4:
6183 return DXGI_FORMAT_R8G8B8A8_UNORM;
6184 case QRhiVertexInputAttribute::UNormByte2:
6185 return DXGI_FORMAT_R8G8_UNORM;
6186 case QRhiVertexInputAttribute::UNormByte:
6187 return DXGI_FORMAT_R8_UNORM;
6188 case QRhiVertexInputAttribute::UInt4:
6189 return DXGI_FORMAT_R32G32B32A32_UINT;
6190 case QRhiVertexInputAttribute::UInt3:
6191 return DXGI_FORMAT_R32G32B32_UINT;
6192 case QRhiVertexInputAttribute::UInt2:
6193 return DXGI_FORMAT_R32G32_UINT;
6194 case QRhiVertexInputAttribute::UInt:
6195 return DXGI_FORMAT_R32_UINT;
6196 case QRhiVertexInputAttribute::SInt4:
6197 return DXGI_FORMAT_R32G32B32A32_SINT;
6198 case QRhiVertexInputAttribute::SInt3:
6199 return DXGI_FORMAT_R32G32B32_SINT;
6200 case QRhiVertexInputAttribute::SInt2:
6201 return DXGI_FORMAT_R32G32_SINT;
6202 case QRhiVertexInputAttribute::SInt:
6203 return DXGI_FORMAT_R32_SINT;
6204 case QRhiVertexInputAttribute::Half4:
6205 // Note: D3D does not support half3. Pass through half3 as half4.
6206 case QRhiVertexInputAttribute::Half3:
6207 return DXGI_FORMAT_R16G16B16A16_FLOAT;
6208 case QRhiVertexInputAttribute::Half2:
6209 return DXGI_FORMAT_R16G16_FLOAT;
6210 case QRhiVertexInputAttribute::Half:
6211 return DXGI_FORMAT_R16_FLOAT;
6212 case QRhiVertexInputAttribute::UShort4:
6213 // Note: D3D does not support UShort3. Pass through UShort3 as UShort4.
6214 case QRhiVertexInputAttribute::UShort3:
6215 return DXGI_FORMAT_R16G16B16A16_UINT;
6216 case QRhiVertexInputAttribute::UShort2:
6217 return DXGI_FORMAT_R16G16_UINT;
6218 case QRhiVertexInputAttribute::UShort:
6219 return DXGI_FORMAT_R16_UINT;
6220 case QRhiVertexInputAttribute::SShort4:
6221 // Note: D3D does not support SShort3. Pass through SShort3 as SShort4.
6222 case QRhiVertexInputAttribute::SShort3:
6223 return DXGI_FORMAT_R16G16B16A16_SINT;
6224 case QRhiVertexInputAttribute::SShort2:
6225 return DXGI_FORMAT_R16G16_SINT;
6226 case QRhiVertexInputAttribute::SShort:
6227 return DXGI_FORMAT_R16_SINT;
6228 }
6229 Q_UNREACHABLE_RETURN(DXGI_FORMAT_R32G32B32A32_FLOAT);
6230}
6231
6232QD3D12GraphicsPipeline::QD3D12GraphicsPipeline(QRhiImplementation *rhi)
6233 : QRhiGraphicsPipeline(rhi)
6234{
6235}
6236
6237QD3D12GraphicsPipeline::~QD3D12GraphicsPipeline()
6238{
6239 destroy();
6240}
6241
6242void QD3D12GraphicsPipeline::destroy()
6243{
6244 if (handle.isNull())
6245 return;
6246
6247 QRHI_RES_RHI(QRhiD3D12);
6248 if (rhiD) {
6249 rhiD->releaseQueue.deferredReleasePipeline(handle);
6250 rhiD->releaseQueue.deferredReleaseRootSignature(rootSigHandle);
6251 }
6252
6253 handle = {};
6254 stageData = {};
6255
6256 if (rhiD)
6257 rhiD->unregisterResource(this);
6258}
6259
6260bool QD3D12GraphicsPipeline::create()
6261{
6262 if (!handle.isNull())
6263 destroy();
6264
6265 QRHI_RES_RHI(QRhiD3D12);
6266 if (!rhiD->sanityCheckGraphicsPipeline(this))
6267 return false;
6268
6269 rhiD->pipelineCreationStart();
6270
6271 QByteArray shaderBytecode[5];
6272 for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
6273 const QD3D12Stage d3dStage = qd3d12_stage(shaderStage.type());
6274 stageData[d3dStage].valid = true;
6275 stageData[d3dStage].stage = d3dStage;
6276 auto cacheIt = rhiD->shaderBytecodeCache.data.constFind(shaderStage);
6277 if (cacheIt != rhiD->shaderBytecodeCache.data.constEnd()) {
6278 shaderBytecode[d3dStage] = cacheIt->bytecode;
6279 stageData[d3dStage].nativeResourceBindingMap = cacheIt->nativeResourceBindingMap;
6280 } else {
6281 QString error;
6282 QShaderKey shaderKey;
6283 int compileFlags = 0;
6284 if (m_flags.testFlag(CompileShadersWithDebugInfo))
6285 compileFlags |= int(HlslCompileFlag::WithDebugInfo);
6286 const QByteArray bytecode = compileHlslShaderSource(shaderStage.shader(),
6287 shaderStage.shaderVariant(),
6288 compileFlags,
6289 &error,
6290 &shaderKey);
6291 if (bytecode.isEmpty()) {
6292 qWarning("HLSL graphics shader compilation failed: %s", qPrintable(error));
6293 return false;
6294 }
6295
6296 shaderBytecode[d3dStage] = bytecode;
6297 stageData[d3dStage].nativeResourceBindingMap = shaderStage.shader().nativeResourceBindingMap(shaderKey);
6298 rhiD->shaderBytecodeCache.insertWithCapacityLimit(shaderStage,
6299 { bytecode, stageData[d3dStage].nativeResourceBindingMap });
6300 }
6301 }
6302
6303 QD3D12ShaderResourceBindings *srbD = QRHI_RES(QD3D12ShaderResourceBindings, m_shaderResourceBindings);
6304 if (srbD) {
6305 rootSigHandle = srbD->createRootSignature(stageData.data(), 5);
6306 if (rootSigHandle.isNull()) {
6307 qWarning("Failed to create root signature");
6308 return false;
6309 }
6310 }
6311 ID3D12RootSignature *rootSig = nullptr;
6312 if (QD3D12RootSignature *rs = rhiD->rootSignaturePool.lookupRef(rootSigHandle))
6313 rootSig = rs->rootSig;
6314 if (!rootSig) {
6315 qWarning("Cannot create graphics pipeline state without root signature");
6316 return false;
6317 }
6318
6319 QD3D12RenderPassDescriptor *rpD = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc);
6320 DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN;
6321 if (rpD->colorAttachmentCount > 0) {
6322 format = DXGI_FORMAT(rpD->colorFormat[0]);
6323 } else if (rpD->hasDepthStencil) {
6324 format = DXGI_FORMAT(rpD->dsFormat);
6325 } else {
6326 qWarning("Cannot create graphics pipeline state without color or depthStencil format");
6327 return false;
6328 }
6329 const DXGI_SAMPLE_DESC sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, format);
6330
6331 struct {
6332 QD3D12PipelineStateSubObject<ID3D12RootSignature *, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE> rootSig;
6333 QD3D12PipelineStateSubObject<D3D12_INPUT_LAYOUT_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_INPUT_LAYOUT> inputLayout;
6334 QD3D12PipelineStateSubObject<D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_IB_STRIP_CUT_VALUE> primitiveRestartValue;
6335 QD3D12PipelineStateSubObject<D3D12_PRIMITIVE_TOPOLOGY_TYPE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY> primitiveTopology;
6336 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS> VS;
6337 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_HS> HS;
6338 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DS> DS;
6339 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_GS> GS;
6340 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS> PS;
6341 QD3D12PipelineStateSubObject<D3D12_RASTERIZER_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER> rasterizerState;
6342 QD3D12PipelineStateSubObject<D3D12_DEPTH_STENCIL_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL> depthStencilState;
6343 QD3D12PipelineStateSubObject<D3D12_BLEND_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND> blendState;
6344 QD3D12PipelineStateSubObject<D3D12_RT_FORMAT_ARRAY, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS> rtFormats;
6345 QD3D12PipelineStateSubObject<DXGI_FORMAT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT> dsFormat;
6346 QD3D12PipelineStateSubObject<DXGI_SAMPLE_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC> sampleDesc;
6347 QD3D12PipelineStateSubObject<UINT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK> sampleMask;
6348 QD3D12PipelineStateSubObject<D3D12_VIEW_INSTANCING_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VIEW_INSTANCING> viewInstancingDesc;
6349 } stream;
6350
6351 stream.rootSig.object = rootSig;
6352
6353 QVarLengthArray<D3D12_INPUT_ELEMENT_DESC, 4> inputDescs;
6354 QByteArrayList matrixSliceSemantics;
6355 if (!shaderBytecode[VS].isEmpty()) {
6356 for (auto it = m_vertexInputLayout.cbeginAttributes(), itEnd = m_vertexInputLayout.cendAttributes();
6357 it != itEnd; ++it)
6358 {
6359 D3D12_INPUT_ELEMENT_DESC desc = {};
6360 // The output from SPIRV-Cross uses TEXCOORD<location> as the
6361 // semantic, except for matrices that are unrolled into consecutive
6362 // vec2/3/4s attributes and need TEXCOORD<location>_ as
6363 // SemanticName and row/column index as SemanticIndex.
6364 const int matrixSlice = it->matrixSlice();
6365 if (matrixSlice < 0) {
6366 desc.SemanticName = "TEXCOORD";
6367 desc.SemanticIndex = UINT(it->location());
6368 } else {
6369 QByteArray sem;
6370 sem.resize(16);
6371 std::snprintf(sem.data(), sem.size(), "TEXCOORD%d_", it->location() - matrixSlice);
6372 matrixSliceSemantics.append(sem);
6373 desc.SemanticName = matrixSliceSemantics.last().constData();
6374 desc.SemanticIndex = UINT(matrixSlice);
6375 }
6376 desc.Format = toD3DAttributeFormat(it->format());
6377 desc.InputSlot = UINT(it->binding());
6378 desc.AlignedByteOffset = it->offset();
6379 const QRhiVertexInputBinding *inputBinding = m_vertexInputLayout.bindingAt(it->binding());
6380 if (inputBinding->classification() == QRhiVertexInputBinding::PerInstance) {
6381 desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
6382 desc.InstanceDataStepRate = inputBinding->instanceStepRate();
6383 } else {
6384 desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
6385 }
6386 inputDescs.append(desc);
6387 }
6388 }
6389
6390 stream.inputLayout.object.NumElements = inputDescs.count();
6391 stream.inputLayout.object.pInputElementDescs = inputDescs.isEmpty() ? nullptr : inputDescs.constData();
6392
6393 stream.primitiveRestartValue.object = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
6394
6395 stream.primitiveTopology.object = toD3DTopologyType(m_topology);
6396 topology = toD3DTopology(m_topology, m_patchControlPointCount);
6397
6398 for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
6399 const int d3dStage = qd3d12_stage(shaderStage.type());
6400 switch (d3dStage) {
6401 case VS:
6402 stream.VS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6403 stream.VS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6404 break;
6405 case HS:
6406 stream.HS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6407 stream.HS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6408 break;
6409 case DS:
6410 stream.DS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6411 stream.DS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6412 break;
6413 case GS:
6414 stream.GS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6415 stream.GS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6416 break;
6417 case PS:
6418 stream.PS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6419 stream.PS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6420 break;
6421 default:
6422 Q_UNREACHABLE();
6423 break;
6424 }
6425 }
6426
6427 stream.rasterizerState.object.FillMode = toD3DFillMode(m_polygonMode);
6428 stream.rasterizerState.object.CullMode = toD3DCullMode(m_cullMode);
6429 stream.rasterizerState.object.FrontCounterClockwise = m_frontFace == CCW;
6430 stream.rasterizerState.object.DepthBias = m_depthBias;
6431 stream.rasterizerState.object.SlopeScaledDepthBias = m_slopeScaledDepthBias;
6432 stream.rasterizerState.object.DepthClipEnable = m_depthClamp ? FALSE : TRUE;
6433 stream.rasterizerState.object.MultisampleEnable = sampleDesc.Count > 1;
6434
6435 stream.depthStencilState.object.DepthEnable = m_depthTest;
6436 stream.depthStencilState.object.DepthWriteMask = m_depthWrite ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
6437 stream.depthStencilState.object.DepthFunc = toD3DCompareOp(m_depthOp);
6438 stream.depthStencilState.object.StencilEnable = m_stencilTest;
6439 if (m_stencilTest) {
6440 stream.depthStencilState.object.StencilReadMask = UINT8(m_stencilReadMask);
6441 stream.depthStencilState.object.StencilWriteMask = UINT8(m_stencilWriteMask);
6442 stream.depthStencilState.object.FrontFace.StencilFailOp = toD3DStencilOp(m_stencilFront.failOp);
6443 stream.depthStencilState.object.FrontFace.StencilDepthFailOp = toD3DStencilOp(m_stencilFront.depthFailOp);
6444 stream.depthStencilState.object.FrontFace.StencilPassOp = toD3DStencilOp(m_stencilFront.passOp);
6445 stream.depthStencilState.object.FrontFace.StencilFunc = toD3DCompareOp(m_stencilFront.compareOp);
6446 stream.depthStencilState.object.BackFace.StencilFailOp = toD3DStencilOp(m_stencilBack.failOp);
6447 stream.depthStencilState.object.BackFace.StencilDepthFailOp = toD3DStencilOp(m_stencilBack.depthFailOp);
6448 stream.depthStencilState.object.BackFace.StencilPassOp = toD3DStencilOp(m_stencilBack.passOp);
6449 stream.depthStencilState.object.BackFace.StencilFunc = toD3DCompareOp(m_stencilBack.compareOp);
6450 }
6451
6452 stream.blendState.object.IndependentBlendEnable = m_targetBlends.count() > 1;
6453 for (int i = 0, ie = m_targetBlends.count(); i != ie; ++i) {
6454 const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[i]);
6455 D3D12_RENDER_TARGET_BLEND_DESC blend = {};
6456 blend.BlendEnable = b.enable;
6457 blend.SrcBlend = toD3DBlendFactor(b.srcColor, true);
6458 blend.DestBlend = toD3DBlendFactor(b.dstColor, true);
6459 blend.BlendOp = toD3DBlendOp(b.opColor);
6460 blend.SrcBlendAlpha = toD3DBlendFactor(b.srcAlpha, false);
6461 blend.DestBlendAlpha = toD3DBlendFactor(b.dstAlpha, false);
6462 blend.BlendOpAlpha = toD3DBlendOp(b.opAlpha);
6463 blend.RenderTargetWriteMask = toD3DColorWriteMask(b.colorWrite);
6464 stream.blendState.object.RenderTarget[i] = blend;
6465 }
6466 if (m_targetBlends.isEmpty()) {
6467 D3D12_RENDER_TARGET_BLEND_DESC blend = {};
6468 blend.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
6469 stream.blendState.object.RenderTarget[0] = blend;
6470 }
6471
6472 stream.rtFormats.object.NumRenderTargets = rpD->colorAttachmentCount;
6473 for (int i = 0; i < rpD->colorAttachmentCount; ++i)
6474 stream.rtFormats.object.RTFormats[i] = DXGI_FORMAT(rpD->colorFormat[i]);
6475
6476 stream.dsFormat.object = rpD->hasDepthStencil ? DXGI_FORMAT(rpD->dsFormat) : DXGI_FORMAT_UNKNOWN;
6477
6478 stream.sampleDesc.object = sampleDesc;
6479
6480 stream.sampleMask.object = 0xFFFFFFFF;
6481
6482 viewInstanceMask = 0;
6483 const bool isMultiView = m_multiViewCount >= 2;
6484 stream.viewInstancingDesc.object.ViewInstanceCount = isMultiView ? m_multiViewCount : 0;
6485 QVarLengthArray<D3D12_VIEW_INSTANCE_LOCATION, 4> viewInstanceLocations;
6486 if (isMultiView) {
6487 for (int i = 0; i < m_multiViewCount; ++i) {
6488 viewInstanceMask |= (1 << i);
6489 viewInstanceLocations.append({ 0, UINT(i) });
6490 }
6491 stream.viewInstancingDesc.object.pViewInstanceLocations = viewInstanceLocations.constData();
6492 }
6493
6494 const D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = { sizeof(stream), &stream };
6495
6496 ID3D12PipelineState *pso = nullptr;
6497 HRESULT hr = rhiD->dev->CreatePipelineState(&streamDesc, __uuidof(ID3D12PipelineState), reinterpret_cast<void **>(&pso));
6498 if (FAILED(hr)) {
6499 qWarning("Failed to create graphics pipeline state: %s",
6500 qPrintable(QSystemError::windowsComString(hr)));
6501 rhiD->rootSignaturePool.remove(rootSigHandle);
6502 rootSigHandle = {};
6503 return false;
6504 }
6505
6506 handle = QD3D12Pipeline::addToPool(&rhiD->pipelinePool, QD3D12Pipeline::Graphics, pso);
6507
6508 rhiD->pipelineCreationEnd();
6509 generation += 1;
6510 rhiD->registerResource(this);
6511 return true;
6512}
6513
6514QD3D12ComputePipeline::QD3D12ComputePipeline(QRhiImplementation *rhi)
6515 : QRhiComputePipeline(rhi)
6516{
6517}
6518
6519QD3D12ComputePipeline::~QD3D12ComputePipeline()
6520{
6521 destroy();
6522}
6523
6524void QD3D12ComputePipeline::destroy()
6525{
6526 if (handle.isNull())
6527 return;
6528
6529 QRHI_RES_RHI(QRhiD3D12);
6530 if (rhiD) {
6531 rhiD->releaseQueue.deferredReleasePipeline(handle);
6532 rhiD->releaseQueue.deferredReleaseRootSignature(rootSigHandle);
6533 }
6534
6535 handle = {};
6536 stageData = {};
6537
6538 if (rhiD)
6539 rhiD->unregisterResource(this);
6540}
6541
6542bool QD3D12ComputePipeline::create()
6543{
6544 if (!handle.isNull())
6545 destroy();
6546
6547 QRHI_RES_RHI(QRhiD3D12);
6548 rhiD->pipelineCreationStart();
6549
6550 stageData.valid = true;
6551 stageData.stage = CS;
6552
6553 QByteArray shaderBytecode;
6554 auto cacheIt = rhiD->shaderBytecodeCache.data.constFind(m_shaderStage);
6555 if (cacheIt != rhiD->shaderBytecodeCache.data.constEnd()) {
6556 shaderBytecode = cacheIt->bytecode;
6557 stageData.nativeResourceBindingMap = cacheIt->nativeResourceBindingMap;
6558 } else {
6559 QString error;
6560 QShaderKey shaderKey;
6561 int compileFlags = 0;
6562 if (m_flags.testFlag(CompileShadersWithDebugInfo))
6563 compileFlags |= int(HlslCompileFlag::WithDebugInfo);
6564 const QByteArray bytecode = compileHlslShaderSource(m_shaderStage.shader(),
6565 m_shaderStage.shaderVariant(),
6566 compileFlags,
6567 &error,
6568 &shaderKey);
6569 if (bytecode.isEmpty()) {
6570 qWarning("HLSL compute shader compilation failed: %s", qPrintable(error));
6571 return false;
6572 }
6573
6574 shaderBytecode = bytecode;
6575 stageData.nativeResourceBindingMap = m_shaderStage.shader().nativeResourceBindingMap(shaderKey);
6576 rhiD->shaderBytecodeCache.insertWithCapacityLimit(m_shaderStage, { bytecode,
6577 stageData.nativeResourceBindingMap });
6578 }
6579
6580 QD3D12ShaderResourceBindings *srbD = QRHI_RES(QD3D12ShaderResourceBindings, m_shaderResourceBindings);
6581 if (srbD) {
6582 rootSigHandle = srbD->createRootSignature(&stageData, 1);
6583 if (rootSigHandle.isNull()) {
6584 qWarning("Failed to create root signature");
6585 return false;
6586 }
6587 }
6588 ID3D12RootSignature *rootSig = nullptr;
6589 if (QD3D12RootSignature *rs = rhiD->rootSignaturePool.lookupRef(rootSigHandle))
6590 rootSig = rs->rootSig;
6591 if (!rootSig) {
6592 qWarning("Cannot create compute pipeline state without root signature");
6593 return false;
6594 }
6595
6596 struct {
6597 QD3D12PipelineStateSubObject<ID3D12RootSignature *, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE> rootSig;
6598 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS> CS;
6599 } stream;
6600 stream.rootSig.object = rootSig;
6601 stream.CS.object.pShaderBytecode = shaderBytecode.constData();
6602 stream.CS.object.BytecodeLength = shaderBytecode.size();
6603 const D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = { sizeof(stream), &stream };
6604 ID3D12PipelineState *pso = nullptr;
6605 HRESULT hr = rhiD->dev->CreatePipelineState(&streamDesc, __uuidof(ID3D12PipelineState), reinterpret_cast<void **>(&pso));
6606 if (FAILED(hr)) {
6607 qWarning("Failed to create compute pipeline state: %s",
6608 qPrintable(QSystemError::windowsComString(hr)));
6609 rhiD->rootSignaturePool.remove(rootSigHandle);
6610 rootSigHandle = {};
6611 return false;
6612 }
6613
6614 handle = QD3D12Pipeline::addToPool(&rhiD->pipelinePool, QD3D12Pipeline::Compute, pso);
6615
6616 rhiD->pipelineCreationEnd();
6617 generation += 1;
6618 rhiD->registerResource(this);
6619 return true;
6620}
6621
6622// This is a lot like in the Metal backend: we need to now the rtv and dsv
6623// formats to create a graphics pipeline, and that's exactly what our
6624// "renderpass descriptor" is going to hold.
6625QD3D12RenderPassDescriptor::QD3D12RenderPassDescriptor(QRhiImplementation *rhi)
6626 : QRhiRenderPassDescriptor(rhi)
6627{
6628 serializedFormatData.reserve(16);
6629}
6630
6631QD3D12RenderPassDescriptor::~QD3D12RenderPassDescriptor()
6632{
6633 destroy();
6634}
6635
6636void QD3D12RenderPassDescriptor::destroy()
6637{
6638 QRHI_RES_RHI(QRhiD3D12);
6639 if (rhiD)
6640 rhiD->unregisterResource(this);
6641}
6642
6643bool QD3D12RenderPassDescriptor::isCompatible(const QRhiRenderPassDescriptor *other) const
6644{
6645 if (!other)
6646 return false;
6647
6648 const QD3D12RenderPassDescriptor *o = QRHI_RES(const QD3D12RenderPassDescriptor, other);
6649
6650 if (colorAttachmentCount != o->colorAttachmentCount)
6651 return false;
6652
6653 if (hasDepthStencil != o->hasDepthStencil)
6654 return false;
6655
6656 for (int i = 0; i < colorAttachmentCount; ++i) {
6657 if (colorFormat[i] != o->colorFormat[i])
6658 return false;
6659 }
6660
6661 if (hasDepthStencil) {
6662 if (dsFormat != o->dsFormat)
6663 return false;
6664 }
6665
6666 if (hasShadingRateMap != o->hasShadingRateMap)
6667 return false;
6668
6669 return true;
6670}
6671
6672void QD3D12RenderPassDescriptor::updateSerializedFormat()
6673{
6674 serializedFormatData.clear();
6675 auto p = std::back_inserter(serializedFormatData);
6676
6677 *p++ = colorAttachmentCount;
6678 *p++ = hasDepthStencil;
6679 for (int i = 0; i < colorAttachmentCount; ++i)
6680 *p++ = colorFormat[i];
6681 *p++ = hasDepthStencil ? dsFormat : 0;
6682}
6683
6684QRhiRenderPassDescriptor *QD3D12RenderPassDescriptor::newCompatibleRenderPassDescriptor() const
6685{
6686 QD3D12RenderPassDescriptor *rpD = new QD3D12RenderPassDescriptor(m_rhi);
6687 rpD->colorAttachmentCount = colorAttachmentCount;
6688 rpD->hasDepthStencil = hasDepthStencil;
6689 memcpy(rpD->colorFormat, colorFormat, sizeof(colorFormat));
6690 rpD->dsFormat = dsFormat;
6691 rpD->hasShadingRateMap = hasShadingRateMap;
6692
6693 rpD->updateSerializedFormat();
6694
6695 QRHI_RES_RHI(QRhiD3D12);
6696 rhiD->registerResource(rpD);
6697 return rpD;
6698}
6699
6700QVector<quint32> QD3D12RenderPassDescriptor::serializedFormat() const
6701{
6702 return serializedFormatData;
6703}
6704
6705QD3D12CommandBuffer::QD3D12CommandBuffer(QRhiImplementation *rhi)
6706 : QRhiCommandBuffer(rhi)
6707{
6708 resetState();
6709}
6710
6711QD3D12CommandBuffer::~QD3D12CommandBuffer()
6712{
6713 destroy();
6714}
6715
6716void QD3D12CommandBuffer::destroy()
6717{
6718 // nothing to do here, the command list is not owned by us
6719}
6720
6721const QRhiNativeHandles *QD3D12CommandBuffer::nativeHandles()
6722{
6723 nativeHandlesStruct.commandList = cmdList;
6724 return &nativeHandlesStruct;
6725}
6726
6727QD3D12SwapChainRenderTarget::QD3D12SwapChainRenderTarget(QRhiImplementation *rhi, QRhiSwapChain *swapchain)
6728 : QRhiSwapChainRenderTarget(rhi, swapchain),
6729 d(rhi)
6730{
6731}
6732
6733QD3D12SwapChainRenderTarget::~QD3D12SwapChainRenderTarget()
6734{
6735 destroy();
6736}
6737
6738void QD3D12SwapChainRenderTarget::destroy()
6739{
6740 // nothing to do here
6741}
6742
6743QSize QD3D12SwapChainRenderTarget::pixelSize() const
6744{
6745 return d.pixelSize;
6746}
6747
6748float QD3D12SwapChainRenderTarget::devicePixelRatio() const
6749{
6750 return d.dpr;
6751}
6752
6753int QD3D12SwapChainRenderTarget::sampleCount() const
6754{
6755 return d.sampleCount;
6756}
6757
6758QD3D12SwapChain::QD3D12SwapChain(QRhiImplementation *rhi)
6759 : QRhiSwapChain(rhi),
6760 rtWrapper(rhi, this),
6761 rtWrapperRight(rhi, this),
6762 cbWrapper(rhi)
6763{
6764}
6765
6766QD3D12SwapChain::~QD3D12SwapChain()
6767{
6768 destroy();
6769}
6770
6771void QD3D12SwapChain::destroy()
6772{
6773 if (!swapChain)
6774 return;
6775
6776 releaseBuffers();
6777
6778 swapChain->Release();
6779 swapChain = nullptr;
6780 sourceSwapChain1->Release();
6781 sourceSwapChain1 = nullptr;
6782
6783 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
6784 FrameResources &fr(frameRes[i]);
6785 if (fr.fence)
6786 fr.fence->Release();
6787 if (fr.fenceEvent)
6788 CloseHandle(fr.fenceEvent);
6789 if (fr.cmdList)
6790 fr.cmdList->Release();
6791 fr = {};
6792 }
6793
6794 if (dcompVisual) {
6795 dcompVisual->Release();
6796 dcompVisual = nullptr;
6797 }
6798
6799 if (dcompTarget) {
6800 dcompTarget->Release();
6801 dcompTarget = nullptr;
6802 }
6803
6804 if (frameLatencyWaitableObject) {
6805 CloseHandle(frameLatencyWaitableObject);
6806 frameLatencyWaitableObject = nullptr;
6807 }
6808
6809 QDxgiVSyncService::instance()->unregisterWindow(window);
6810
6811 QRHI_RES_RHI(QRhiD3D12);
6812 if (rhiD) {
6813 rhiD->swapchains.remove(this);
6814 rhiD->unregisterResource(this);
6815 }
6816}
6817
6818void QD3D12SwapChain::releaseBuffers()
6819{
6820 QRHI_RES_RHI(QRhiD3D12);
6821 rhiD->waitGpu();
6822 for (UINT i = 0; i < BUFFER_COUNT; ++i) {
6823 rhiD->resourcePool.remove(colorBuffers[i]);
6824 rhiD->rtvPool.release(rtvs[i], 1);
6825 if (stereo)
6826 rhiD->rtvPool.release(rtvsRight[i], 1);
6827 if (!msaaBuffers[i].isNull())
6828 rhiD->resourcePool.remove(msaaBuffers[i]);
6829 if (msaaRtvs[i].isValid())
6830 rhiD->rtvPool.release(msaaRtvs[i], 1);
6831 }
6832}
6833
6834void QD3D12SwapChain::waitCommandCompletionForFrameSlot(int frameSlot)
6835{
6836 FrameResources &fr(frameRes[frameSlot]);
6837 if (fr.fence->GetCompletedValue() < fr.fenceCounter) {
6838 fr.fence->SetEventOnCompletion(fr.fenceCounter, fr.fenceEvent);
6839 WaitForSingleObject(fr.fenceEvent, INFINITE);
6840 }
6841}
6842
6843void QD3D12SwapChain::addCommandCompletionSignalForCurrentFrameSlot()
6844{
6845 QRHI_RES_RHI(QRhiD3D12);
6846 FrameResources &fr(frameRes[currentFrameSlot]);
6847 fr.fenceCounter += 1u;
6848 rhiD->cmdQueue->Signal(fr.fence, fr.fenceCounter);
6849}
6850
6851QRhiCommandBuffer *QD3D12SwapChain::currentFrameCommandBuffer()
6852{
6853 return &cbWrapper;
6854}
6855
6856QRhiRenderTarget *QD3D12SwapChain::currentFrameRenderTarget()
6857{
6858 return &rtWrapper;
6859}
6860
6861QRhiRenderTarget *QD3D12SwapChain::currentFrameRenderTarget(StereoTargetBuffer targetBuffer)
6862{
6863 return !stereo || targetBuffer == StereoTargetBuffer::LeftBuffer ? &rtWrapper : &rtWrapperRight;
6864}
6865
6866QSize QD3D12SwapChain::surfacePixelSize()
6867{
6868 Q_ASSERT(m_window);
6869 return m_window->size() * m_window->devicePixelRatio();
6870}
6871
6872bool QD3D12SwapChain::isFormatSupported(Format f)
6873{
6874 if (f == SDR)
6875 return true;
6876
6877 if (!m_window) {
6878 qWarning("Attempted to call isFormatSupported() without a window set");
6879 return false;
6880 }
6881
6882 QRHI_RES_RHI(QRhiD3D12);
6883 if (QDxgiHdrInfo(rhiD->activeAdapter).isHdrCapable(m_window))
6884 return f == QRhiSwapChain::HDRExtendedSrgbLinear || f == QRhiSwapChain::HDR10;
6885
6886 return false;
6887}
6888
6889QRhiSwapChainHdrInfo QD3D12SwapChain::hdrInfo()
6890{
6891 QRhiSwapChainHdrInfo info = QRhiSwapChain::hdrInfo();
6892 // Must use m_window, not window, given this may be called before createOrResize().
6893 if (m_window) {
6894 QRHI_RES_RHI(QRhiD3D12);
6895 info = QDxgiHdrInfo(rhiD->activeAdapter).queryHdrInfo(m_window);
6896 }
6897 return info;
6898}
6899
6900QRhiRenderPassDescriptor *QD3D12SwapChain::newCompatibleRenderPassDescriptor()
6901{
6902 // not yet built so cannot rely on data computed in createOrResize()
6903 chooseFormats();
6904
6905 QD3D12RenderPassDescriptor *rpD = new QD3D12RenderPassDescriptor(m_rhi);
6906 rpD->colorAttachmentCount = 1;
6907 rpD->hasDepthStencil = m_depthStencil != nullptr;
6908 rpD->colorFormat[0] = int(srgbAdjustedColorFormat);
6909 rpD->dsFormat = QD3D12RenderBuffer::DS_FORMAT;
6910
6911 rpD->hasShadingRateMap = m_shadingRateMap != nullptr;
6912
6913 rpD->updateSerializedFormat();
6914
6915 QRHI_RES_RHI(QRhiD3D12);
6916 rhiD->registerResource(rpD);
6917 return rpD;
6918}
6919
6920bool QRhiD3D12::ensureDirectCompositionDevice()
6921{
6922 if (dcompDevice)
6923 return true;
6924
6925 qCDebug(QRHI_LOG_INFO, "Creating Direct Composition device (needed for semi-transparent windows)");
6926 dcompDevice = QRhiD3D::createDirectCompositionDevice();
6927 return dcompDevice ? true : false;
6928}
6929
6930static const DXGI_FORMAT DEFAULT_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM;
6931static const DXGI_FORMAT DEFAULT_SRGB_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
6932
6933void QD3D12SwapChain::chooseFormats()
6934{
6935 colorFormat = DEFAULT_FORMAT;
6936 srgbAdjustedColorFormat = m_flags.testFlag(sRGB) ? DEFAULT_SRGB_FORMAT : DEFAULT_FORMAT;
6937 hdrColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; // SDR
6938 QRHI_RES_RHI(QRhiD3D12);
6939 if (m_format != SDR) {
6940 if (QDxgiHdrInfo(rhiD->activeAdapter).isHdrCapable(m_window)) {
6941 // https://docs.microsoft.com/en-us/windows/win32/direct3darticles/high-dynamic-range
6942 switch (m_format) {
6943 case HDRExtendedSrgbLinear:
6944 colorFormat = DXGI_FORMAT_R16G16B16A16_FLOAT;
6945 hdrColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709;
6946 srgbAdjustedColorFormat = colorFormat;
6947 break;
6948 case HDR10:
6949 colorFormat = DXGI_FORMAT_R10G10B10A2_UNORM;
6950 hdrColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020;
6951 srgbAdjustedColorFormat = colorFormat;
6952 break;
6953 default:
6954 break;
6955 }
6956 } else {
6957 // This happens also when Use HDR is set to Off in the Windows
6958 // Display settings. Show a helpful warning, but continue with the
6959 // default non-HDR format.
6960 qWarning("The output associated with the window is not HDR capable "
6961 "(or Use HDR is Off in the Display Settings), ignoring HDR format request");
6962 }
6963 }
6964 sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, colorFormat);
6965}
6966
6967bool QD3D12SwapChain::createOrResize()
6968{
6969 // Can be called multiple times due to window resizes - that is not the
6970 // same as a simple destroy+create (as with other resources). Just need to
6971 // resize the buffers then.
6972
6973 const bool needsRegistration = !window || window != m_window;
6974
6975 // except if the window actually changes
6976 if (window && window != m_window)
6977 destroy();
6978
6979 window = m_window;
6980 m_currentPixelSize = surfacePixelSize();
6981 pixelSize = m_currentPixelSize;
6982
6983 if (pixelSize.isEmpty())
6984 return false;
6985
6986 HWND hwnd = reinterpret_cast<HWND>(window->winId());
6987 HRESULT hr;
6988 QRHI_RES_RHI(QRhiD3D12);
6989 stereo = m_window->format().stereo() && rhiD->dxgiFactory->IsWindowedStereoEnabled();
6990
6991 if (m_flags.testFlag(SurfaceHasPreMulAlpha) || m_flags.testFlag(SurfaceHasNonPreMulAlpha)) {
6992 if (rhiD->ensureDirectCompositionDevice()) {
6993 if (!dcompTarget) {
6994 hr = rhiD->dcompDevice->CreateTargetForHwnd(hwnd, false, &dcompTarget);
6995 if (FAILED(hr)) {
6996 qWarning("Failed to create Direct Composition target for the window: %s",
6997 qPrintable(QSystemError::windowsComString(hr)));
6998 }
6999 }
7000 if (dcompTarget && !dcompVisual) {
7001 hr = rhiD->dcompDevice->CreateVisual(&dcompVisual);
7002 if (FAILED(hr)) {
7003 qWarning("Failed to create DirectComposition visual: %s",
7004 qPrintable(QSystemError::windowsComString(hr)));
7005 }
7006 }
7007 }
7008 // simple consistency check
7009 if (window->requestedFormat().alphaBufferSize() <= 0)
7010 qWarning("Swapchain says surface has alpha but the window has no alphaBufferSize set. "
7011 "This may lead to problems.");
7012 }
7013
7014 swapInterval = m_flags.testFlag(QRhiSwapChain::NoVSync) ? 0 : 1;
7015 swapChainFlags = 0;
7016 if (swapInterval == 0 && rhiD->supportsAllowTearing)
7017 swapChainFlags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
7018
7019 // maxFrameLatency 0 means no waitable object usage.
7020 // Ignore it also when NoVSync is on, and when using WARP.
7021 const bool useFrameLatencyWaitableObject = rhiD->maxFrameLatency != 0
7022 && swapInterval != 0
7023 && rhiD->driverInfoStruct.deviceType != QRhiDriverInfo::CpuDevice;
7024 if (useFrameLatencyWaitableObject)
7025 swapChainFlags |= DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
7026
7027 if (!swapChain) {
7028 chooseFormats();
7029
7030 DXGI_SWAP_CHAIN_DESC1 desc = {};
7031 desc.Width = UINT(pixelSize.width());
7032 desc.Height = UINT(pixelSize.height());
7033 desc.Format = colorFormat;
7034 desc.SampleDesc.Count = 1;
7035 desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
7036 desc.BufferCount = BUFFER_COUNT;
7037 desc.Flags = swapChainFlags;
7038 desc.Scaling = DXGI_SCALING_NONE;
7039 desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
7040 desc.Stereo = stereo;
7041
7042 if (dcompVisual) {
7043 // With DirectComposition setting AlphaMode to STRAIGHT fails the
7044 // swapchain creation, whereas the result seems to be identical
7045 // with any of the other values, including IGNORE. (?)
7046 desc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED;
7047
7048 // DirectComposition has its own limitations, cannot use
7049 // SCALING_NONE. So with semi-transparency requested we are forced
7050 // to SCALING_STRETCH.
7051 desc.Scaling = DXGI_SCALING_STRETCH;
7052 }
7053
7054 if (dcompVisual)
7055 hr = rhiD->dxgiFactory->CreateSwapChainForComposition(rhiD->cmdQueue, &desc, nullptr, &sourceSwapChain1);
7056 else
7057 hr = rhiD->dxgiFactory->CreateSwapChainForHwnd(rhiD->cmdQueue, hwnd, &desc, nullptr, nullptr, &sourceSwapChain1);
7058
7059 // If failed and we tried a HDR format, then try with SDR. This
7060 // matches other backends, such as Vulkan where if the format is
7061 // not supported, the default one is used instead.
7062 if (FAILED(hr) && m_format != SDR) {
7063 colorFormat = DEFAULT_FORMAT;
7064 desc.Format = DEFAULT_FORMAT;
7065 if (dcompVisual)
7066 hr = rhiD->dxgiFactory->CreateSwapChainForComposition(rhiD->cmdQueue, &desc, nullptr, &sourceSwapChain1);
7067 else
7068 hr = rhiD->dxgiFactory->CreateSwapChainForHwnd(rhiD->cmdQueue, hwnd, &desc, nullptr, nullptr, &sourceSwapChain1);
7069 }
7070
7071 if (SUCCEEDED(hr)) {
7072 if (FAILED(sourceSwapChain1->QueryInterface(__uuidof(IDXGISwapChain3), reinterpret_cast<void **>(&swapChain)))) {
7073 qWarning("IDXGISwapChain3 not available");
7074 return false;
7075 }
7076 if (m_format != SDR) {
7077 hr = swapChain->SetColorSpace1(hdrColorSpace);
7078 if (FAILED(hr)) {
7079 qWarning("Failed to set color space on swapchain: %s",
7080 qPrintable(QSystemError::windowsComString(hr)));
7081 }
7082 }
7083 if (useFrameLatencyWaitableObject) {
7084 swapChain->SetMaximumFrameLatency(rhiD->maxFrameLatency);
7085 frameLatencyWaitableObject = swapChain->GetFrameLatencyWaitableObject();
7086 }
7087 if (dcompVisual) {
7088 hr = dcompVisual->SetContent(swapChain);
7089 if (SUCCEEDED(hr)) {
7090 hr = dcompTarget->SetRoot(dcompVisual);
7091 if (FAILED(hr)) {
7092 qWarning("Failed to associate Direct Composition visual with the target: %s",
7093 qPrintable(QSystemError::windowsComString(hr)));
7094 }
7095 } else {
7096 qWarning("Failed to set content for Direct Composition visual: %s",
7097 qPrintable(QSystemError::windowsComString(hr)));
7098 }
7099 } else {
7100 // disable Alt+Enter; not relevant when using DirectComposition
7101 rhiD->dxgiFactory->MakeWindowAssociation(hwnd, DXGI_MWA_NO_WINDOW_CHANGES);
7102 }
7103 }
7104 if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
7105 qWarning("Device loss detected during swapchain creation");
7106 rhiD->deviceLost = true;
7107 return false;
7108 } else if (FAILED(hr)) {
7109 qWarning("Failed to create D3D12 swapchain: %s"
7110 " (Width=%u Height=%u Format=%u SampleCount=%u BufferCount=%u Scaling=%u SwapEffect=%u Stereo=%u)",
7111 qPrintable(QSystemError::windowsComString(hr)),
7112 desc.Width, desc.Height, UINT(desc.Format), desc.SampleDesc.Count,
7113 desc.BufferCount, UINT(desc.Scaling), UINT(desc.SwapEffect), UINT(desc.Stereo));
7114 return false;
7115 }
7116
7117 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
7118 hr = rhiD->dev->CreateFence(0,
7119 D3D12_FENCE_FLAG_NONE,
7120 __uuidof(ID3D12Fence),
7121 reinterpret_cast<void **>(&frameRes[i].fence));
7122 if (FAILED(hr)) {
7123 qWarning("Failed to create fence for swapchain: %s",
7124 qPrintable(QSystemError::windowsComString(hr)));
7125 return false;
7126 }
7127 frameRes[i].fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
7128
7129 frameRes[i].fenceCounter = 0;
7130 }
7131 } else {
7132 releaseBuffers();
7133 hr = swapChain->ResizeBuffers(BUFFER_COUNT,
7134 UINT(pixelSize.width()),
7135 UINT(pixelSize.height()),
7136 colorFormat,
7137 swapChainFlags);
7138 if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
7139 qWarning("Device loss detected in ResizeBuffers()");
7140 rhiD->deviceLost = true;
7141 return false;
7142 } else if (FAILED(hr)) {
7143 qWarning("Failed to resize D3D12 swapchain: %s", qPrintable(QSystemError::windowsComString(hr)));
7144 return false;
7145 }
7146 }
7147
7148 for (UINT i = 0; i < BUFFER_COUNT; ++i) {
7149 ID3D12Resource *colorBuffer;
7150 hr = swapChain->GetBuffer(i, __uuidof(ID3D12Resource), reinterpret_cast<void **>(&colorBuffer));
7151 if (FAILED(hr)) {
7152 qWarning("Failed to get buffer %u for D3D12 swapchain: %s",
7153 i, qPrintable(QSystemError::windowsComString(hr)));
7154 return false;
7155 }
7156 colorBuffers[i] = QD3D12Resource::addToPool(&rhiD->resourcePool, colorBuffer, D3D12_RESOURCE_STATE_PRESENT);
7157 rtvs[i] = rhiD->rtvPool.allocate(1);
7158 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
7159 rtvDesc.Format = srgbAdjustedColorFormat;
7160 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
7161 rhiD->dev->CreateRenderTargetView(colorBuffer, &rtvDesc, rtvs[i].cpuHandle);
7162
7163 if (stereo) {
7164 rtvsRight[i] = rhiD->rtvPool.allocate(1);
7165 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
7166 rtvDesc.Format = srgbAdjustedColorFormat;
7167 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
7168 rtvDesc.Texture2DArray.ArraySize = 1;
7169 rtvDesc.Texture2DArray.FirstArraySlice = 1;
7170 rhiD->dev->CreateRenderTargetView(colorBuffer, &rtvDesc, rtvsRight[i].cpuHandle);
7171 }
7172 }
7173
7174 if (m_depthStencil && m_depthStencil->sampleCount() != m_sampleCount) {
7175 qWarning("Depth-stencil buffer's sampleCount (%d) does not match color buffers' sample count (%d). Expect problems.",
7176 m_depthStencil->sampleCount(), m_sampleCount);
7177 }
7178 if (m_depthStencil && m_depthStencil->pixelSize() != pixelSize) {
7179 if (m_depthStencil->flags().testFlag(QRhiRenderBuffer::UsedWithSwapChainOnly)) {
7180 m_depthStencil->setPixelSize(pixelSize);
7181 if (!m_depthStencil->create())
7182 qWarning("Failed to rebuild swapchain's associated depth-stencil buffer for size %dx%d",
7183 pixelSize.width(), pixelSize.height());
7184 } else {
7185 qWarning("Depth-stencil buffer's size (%dx%d) does not match the surface size (%dx%d). Expect problems.",
7186 m_depthStencil->pixelSize().width(), m_depthStencil->pixelSize().height(),
7187 pixelSize.width(), pixelSize.height());
7188 }
7189 }
7190
7191 ds = m_depthStencil ? QRHI_RES(QD3D12RenderBuffer, m_depthStencil) : nullptr;
7192
7193 if (sampleDesc.Count > 1) {
7194 for (UINT i = 0; i < BUFFER_COUNT; ++i) {
7195 D3D12_RESOURCE_DESC resourceDesc = {};
7196 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
7197 resourceDesc.Width = UINT64(pixelSize.width());
7198 resourceDesc.Height = UINT(pixelSize.height());
7199 resourceDesc.DepthOrArraySize = 1;
7200 resourceDesc.MipLevels = 1;
7201 resourceDesc.Format = srgbAdjustedColorFormat;
7202 resourceDesc.SampleDesc = sampleDesc;
7203 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
7204 resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
7205 D3D12_CLEAR_VALUE clearValue = {};
7206 clearValue.Format = colorFormat;
7207 ID3D12Resource *resource = nullptr;
7208 D3D12MA::Allocation *allocation = nullptr;
7209 HRESULT hr = rhiD->vma.createResource(D3D12_HEAP_TYPE_DEFAULT,
7210 &resourceDesc,
7211 D3D12_RESOURCE_STATE_RENDER_TARGET,
7212 &clearValue,
7213 &allocation,
7214 __uuidof(ID3D12Resource),
7215 reinterpret_cast<void **>(&resource));
7216 if (FAILED(hr)) {
7217 qWarning("Failed to create MSAA color buffer: %s", qPrintable(QSystemError::windowsComString(hr)));
7218 return false;
7219 }
7220 msaaBuffers[i] = QD3D12Resource::addToPool(&rhiD->resourcePool, resource, D3D12_RESOURCE_STATE_RENDER_TARGET, allocation);
7221 msaaRtvs[i] = rhiD->rtvPool.allocate(1);
7222 if (!msaaRtvs[i].isValid())
7223 return false;
7224 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
7225 rtvDesc.Format = srgbAdjustedColorFormat;
7226 rtvDesc.ViewDimension = sampleDesc.Count > 1 ? D3D12_RTV_DIMENSION_TEXTURE2DMS
7227 : D3D12_RTV_DIMENSION_TEXTURE2D;
7228 rhiD->dev->CreateRenderTargetView(resource, &rtvDesc, msaaRtvs[i].cpuHandle);
7229 }
7230 }
7231
7232 currentBackBufferIndex = swapChain->GetCurrentBackBufferIndex();
7233 currentFrameSlot = 0;
7234 lastFrameLatencyWaitSlot = -1; // wait already in the first frame, as instructed in the dxgi docs
7235
7236 rtWrapper.setRenderPassDescriptor(m_renderPassDesc); // for the public getter in QRhiRenderTarget
7237 QD3D12SwapChainRenderTarget *rtD = QRHI_RES(QD3D12SwapChainRenderTarget, &rtWrapper);
7238 rtD->d.rp = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc);
7239 rtD->d.pixelSize = pixelSize;
7240 rtD->d.dpr = float(window->devicePixelRatio());
7241 rtD->d.sampleCount = int(sampleDesc.Count);
7242 rtD->d.colorAttCount = 1;
7243 rtD->d.dsAttCount = m_depthStencil ? 1 : 0;
7244
7245 rtWrapperRight.setRenderPassDescriptor(m_renderPassDesc);
7246 QD3D12SwapChainRenderTarget *rtDr = QRHI_RES(QD3D12SwapChainRenderTarget, &rtWrapperRight);
7247 rtDr->d.rp = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc);
7248 rtDr->d.pixelSize = pixelSize;
7249 rtDr->d.dpr = float(window->devicePixelRatio());
7250 rtDr->d.sampleCount = int(sampleDesc.Count);
7251 rtDr->d.colorAttCount = 1;
7252 rtDr->d.dsAttCount = m_depthStencil ? 1 : 0;
7253
7254 QDxgiVSyncService::instance()->registerWindow(window);
7255
7256 if (needsRegistration || !rhiD->swapchains.contains(this))
7257 rhiD->swapchains.insert(this);
7258
7259 rhiD->registerResource(this);
7260
7261 return true;
7262}
7263
7264QT_END_NAMESPACE
7265
7266#endif // __ID3D12Device2_INTERFACE_DEFINED__
#define __has_include(x)