Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qrhid3d12.cpp
Go to the documentation of this file.
1// Copyright (C) 2023 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:significant reason:default
4
5#include "qrhid3d12_p.h"
6#include <qmath.h>
7#include <QtCore/private/qsystemerror_p.h>
8#include <comdef.h>
10#include "cs_mipmap_p.h"
11#include "cs_mipmap_3d_p.h"
12
13#if __has_include(<pix.h>)
14#include <pix.h>
15#define QRHI_D3D12_HAS_OLD_PIX
16#endif
17
18#ifdef __ID3D12Device2_INTERFACE_DEFINED__
19
20QT_BEGIN_NAMESPACE
21
22/*
23 Direct 3D 12 backend.
24*/
25
26/*!
27 \class QRhiD3D12InitParams
28 \inmodule QtGuiPrivate
29 \inheaderfile rhi/qrhi.h
30 \brief Direct3D 12 specific initialization parameters.
31
32 \note This is a RHI API with limited compatibility guarantees, see \l QRhi
33 for details.
34
35 A D3D12-based QRhi needs no special parameters for initialization. If
36 desired, enableDebugLayer can be set to \c true to enable the Direct3D
37 debug layer. This can be useful during development, but should be avoided
38 in production builds.
39
40 \badcode
41 QRhiD3D12InitParams params;
42 params.enableDebugLayer = true;
43 rhi = QRhi::create(QRhi::D3D12, &params);
44 \endcode
45
46 \note QRhiSwapChain should only be used in combination with QWindow
47 instances that have their surface type set to QSurface::Direct3DSurface.
48
49 \section2 Working with existing Direct3D 12 devices
50
51 When interoperating with another graphics engine, it may be necessary to
52 get a QRhi instance that uses the same Direct3D device. This can be
53 achieved by passing a pointer to a QRhiD3D12NativeHandles to
54 QRhi::create(). QRhi does not take ownership of any of the external
55 objects.
56
57 Sometimes, for example when using QRhi in combination with OpenXR, one will
58 want to specify which adapter to use, and optionally, which feature level
59 to request on the device, while leaving the device creation to QRhi. This
60 is achieved by leaving the device pointer set to null, while specifying the
61 adapter LUID and feature level.
62
63 Optionally the ID3D12CommandQueue can be specified as well, by setting \c
64 commandQueue to a non-null value.
65 */
66
67/*!
68 \variable QRhiD3D12InitParams::enableDebugLayer
69
70 When set to true, the debug layer is enabled, if installed and available.
71 The default value is false.
72*/
73
74/*!
75 \class QRhiD3D12NativeHandles
76 \inmodule QtGuiPrivate
77 \inheaderfile rhi/qrhi.h
78 \brief Holds the D3D12 device used by the QRhi.
79
80 \note The class uses \c{void *} as the type since including the COM-based
81 \c{d3d12.h} headers is not acceptable here. The actual types are
82 \c{ID3D12Device *} and \c{ID3D12CommandQueue *}.
83
84 \note This is a RHI API with limited compatibility guarantees, see \l QRhi
85 for details.
86 */
87
88/*!
89 \variable QRhiD3D12NativeHandles::dev
90
91 Points to a
92 \l{https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nn-d3d12-id3d12device}{ID3D12Device}
93 or left set to \nullptr if no existing device is to be imported.
94*/
95
96/*!
97 \variable QRhiD3D12NativeHandles::minimumFeatureLevel
98
99 Specifies the \b minimum feature level passed to
100 \l{https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-d3d12createdevice}{D3D12CreateDevice()}.
101 When not set, \c{D3D_FEATURE_LEVEL_11_0} is used. See
102 \l{https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-feature-levels}{this
103 page} for details.
104
105 Relevant only when QRhi creates the device, ignored when importing a device
106 and device context.
107*/
108
109/*!
110 \variable QRhiD3D12NativeHandles::adapterLuidLow
111
112 The low part of the local identifier (LUID) of the DXGI adapter to use.
113 Relevant only when QRhi creates the device, ignored when importing a device
114 and device context.
115*/
116
117/*!
118 \variable QRhiD3D12NativeHandles::adapterLuidHigh
119
120 The high part of the local identifier (LUID) of the DXGI adapter to use.
121 Relevant only when QRhi creates the device, ignored when importing a device
122 and device context.
123*/
124
125/*!
126 \variable QRhiD3D12NativeHandles::commandQueue
127
128 When set, must point to a
129 \l{https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nn-d3d12-id3d12commandqueue}{ID3D12CommandQueue}.
130 It allows to optionally import a command queue as well, in addition to a
131 device.
132*/
133
134/*!
135 \class QRhiD3D12CommandBufferNativeHandles
136 \inmodule QtGuiPrivate
137 \inheaderfile rhi/qrhi.h
138 \brief Holds the ID3D12GraphicsCommandList1 object that is backing a QRhiCommandBuffer.
139
140 \note The command list object is only guaranteed to be valid, and
141 in recording state, while recording a frame. That is, between a
142 \l{QRhi::beginFrame()}{beginFrame()} - \l{QRhi::endFrame()}{endFrame()} or
143 \l{QRhi::beginOffscreenFrame()}{beginOffscreenFrame()} -
144 \l{QRhi::endOffscreenFrame()}{endOffscreenFrame()} pair.
145
146 \note This is a RHI API with limited compatibility guarantees, see \l QRhi
147 for details.
148 */
149
150/*!
151 \variable QRhiD3D12CommandBufferNativeHandles::commandList
152*/
153
154// https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-feature-levels
155static const D3D_FEATURE_LEVEL MIN_FEATURE_LEVEL = D3D_FEATURE_LEVEL_11_0;
156
157QRhiD3D12::QRhiD3D12(QRhiD3D12InitParams *params, QRhiD3D12NativeHandles *importParams)
158{
159 debugLayer = params->enableDebugLayer;
160 if (importParams) {
161 if (importParams->dev) {
162 ID3D12Device *d3d12Device = reinterpret_cast<ID3D12Device *>(importParams->dev);
163 if (SUCCEEDED(d3d12Device->QueryInterface(__uuidof(ID3D12Device2), reinterpret_cast<void **>(&dev)))) {
164 // get rid of the ref added by QueryInterface
165 d3d12Device->Release();
166 importedDevice = true;
167 } else {
168 qWarning("ID3D12Device2 not supported, cannot import device");
169 }
170 }
171 if (importParams->commandQueue) {
172 cmdQueue = reinterpret_cast<ID3D12CommandQueue *>(importParams->commandQueue);
173 importedCommandQueue = true;
174 }
175 minimumFeatureLevel = D3D_FEATURE_LEVEL(importParams->minimumFeatureLevel);
176 adapterLuid.LowPart = importParams->adapterLuidLow;
177 adapterLuid.HighPart = importParams->adapterLuidHigh;
178 }
179}
180
181template <class Int>
182inline Int aligned(Int v, Int byteAlign)
183{
184 return (v + byteAlign - 1) & ~(byteAlign - 1);
185}
186
187static inline UINT calcSubresource(UINT mipSlice, UINT arraySlice, UINT mipLevels)
188{
189 return mipSlice + arraySlice * mipLevels;
190}
191
192static inline QD3D12RenderTargetData *rtData(QRhiRenderTarget *rt)
193{
194 switch (rt->resourceType()) {
195 case QRhiResource::SwapChainRenderTarget:
196 return &QRHI_RES(QD3D12SwapChainRenderTarget, rt)->d;
197 case QRhiResource::TextureRenderTarget:
198 return &QRHI_RES(QD3D12TextureRenderTarget, rt)->d;
199 break;
200 default:
201 break;
202 }
203 Q_UNREACHABLE_RETURN(nullptr);
204}
205
206bool QRhiD3D12::create(QRhi::Flags flags)
207{
208 rhiFlags = flags;
209
210 UINT factoryFlags = 0;
211 if (debugLayer)
212 factoryFlags |= DXGI_CREATE_FACTORY_DEBUG;
213 HRESULT hr = CreateDXGIFactory2(factoryFlags, __uuidof(IDXGIFactory2), reinterpret_cast<void **>(&dxgiFactory));
214 if (FAILED(hr)) {
215 // retry without debug, if it was requested (to match D3D11 backend behavior)
216 if (debugLayer) {
217 qCDebug(QRHI_LOG_INFO, "Debug layer was requested but is not available. "
218 "Attempting to create DXGIFactory2 without it.");
219 factoryFlags &= ~DXGI_CREATE_FACTORY_DEBUG;
220 hr = CreateDXGIFactory2(factoryFlags, __uuidof(IDXGIFactory2), reinterpret_cast<void **>(&dxgiFactory));
221 }
222 if (SUCCEEDED(hr)) {
223 debugLayer = false;
224 } else {
225 qWarning("CreateDXGIFactory2() failed to create DXGI factory: %s",
226 qPrintable(QSystemError::windowsComString(hr)));
227 return false;
228 }
229 }
230
231 if (qEnvironmentVariableIsSet("QT_D3D_MAX_FRAME_LATENCY"))
232 maxFrameLatency = UINT(qMax(0, qEnvironmentVariableIntValue("QT_D3D_MAX_FRAME_LATENCY")));
233 if (maxFrameLatency != 0)
234 qCDebug(QRHI_LOG_INFO, "Using frame latency waitable object with max frame latency %u", maxFrameLatency);
235
236 supportsAllowTearing = false;
237 IDXGIFactory5 *factory5 = nullptr;
238 if (SUCCEEDED(dxgiFactory->QueryInterface(__uuidof(IDXGIFactory5), reinterpret_cast<void **>(&factory5)))) {
239 BOOL allowTearing = false;
240 if (SUCCEEDED(factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &allowTearing, sizeof(allowTearing))))
241 supportsAllowTearing = allowTearing;
242 factory5->Release();
243 }
244
245 if (debugLayer) {
246 ID3D12Debug1 *debug = nullptr;
247 if (SUCCEEDED(D3D12GetDebugInterface(__uuidof(ID3D12Debug1), reinterpret_cast<void **>(&debug)))) {
248 qCDebug(QRHI_LOG_INFO, "Enabling D3D12 debug layer");
249 debug->EnableDebugLayer();
250 debug->Release();
251 }
252 }
253
254 activeAdapter = nullptr;
255
256 if (!importedDevice) {
257 IDXGIAdapter1 *adapter;
258 int requestedAdapterIndex = -1;
259 if (qEnvironmentVariableIsSet("QT_D3D_ADAPTER_INDEX"))
260 requestedAdapterIndex = qEnvironmentVariableIntValue("QT_D3D_ADAPTER_INDEX");
261
262 if (requestedRhiAdapter)
263 adapterLuid = static_cast<QD3D12Adapter *>(requestedRhiAdapter)->luid;
264
265 // importParams or requestedRhiAdapter may specify an adapter by the luid, use that in the absence of an env.var. override.
266 if (requestedAdapterIndex < 0 && (adapterLuid.LowPart || adapterLuid.HighPart)) {
267 for (int adapterIndex = 0; dxgiFactory->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
268 DXGI_ADAPTER_DESC1 desc;
269 adapter->GetDesc1(&desc);
270 adapter->Release();
271 if (desc.AdapterLuid.LowPart == adapterLuid.LowPart
272 && desc.AdapterLuid.HighPart == adapterLuid.HighPart)
273 {
274 requestedAdapterIndex = adapterIndex;
275 break;
276 }
277 }
278 }
279
280 if (requestedAdapterIndex < 0 && flags.testFlag(QRhi::PreferSoftwareRenderer)) {
281 for (int adapterIndex = 0; dxgiFactory->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
282 DXGI_ADAPTER_DESC1 desc;
283 adapter->GetDesc1(&desc);
284 adapter->Release();
285 if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
286 requestedAdapterIndex = adapterIndex;
287 break;
288 }
289 }
290 }
291
292 for (int adapterIndex = 0; dxgiFactory->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
293 DXGI_ADAPTER_DESC1 desc;
294 adapter->GetDesc1(&desc);
295 const QString name = QString::fromUtf16(reinterpret_cast<char16_t *>(desc.Description));
296 qCDebug(QRHI_LOG_INFO, "Adapter %d: '%s' (vendor 0x%X device 0x%X flags 0x%X)",
297 adapterIndex,
298 qPrintable(name),
299 desc.VendorId,
300 desc.DeviceId,
301 desc.Flags);
302 if (!activeAdapter && (requestedAdapterIndex < 0 || requestedAdapterIndex == adapterIndex)) {
303 activeAdapter = adapter;
304 adapterLuid = desc.AdapterLuid;
305 QRhiD3D::fillDriverInfo(&driverInfoStruct, desc);
306 qCDebug(QRHI_LOG_INFO, " using this adapter");
307 } else {
308 adapter->Release();
309 }
310 }
311 if (!activeAdapter) {
312 qWarning("No adapter");
313 return false;
314 }
315
316 if (minimumFeatureLevel == 0)
317 minimumFeatureLevel = MIN_FEATURE_LEVEL;
318
319 hr = D3D12CreateDevice(activeAdapter,
320 minimumFeatureLevel,
321 __uuidof(ID3D12Device2),
322 reinterpret_cast<void **>(&dev));
323 if (FAILED(hr)) {
324 qWarning("Failed to create D3D12 device: %s", qPrintable(QSystemError::windowsComString(hr)));
325 return false;
326 }
327 } else {
328 Q_ASSERT(dev);
329 // cannot just get a IDXGIDevice from the ID3D12Device anymore, look up the adapter instead
330 adapterLuid = dev->GetAdapterLuid();
331 IDXGIAdapter1 *adapter;
332 for (int adapterIndex = 0; dxgiFactory->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
333 DXGI_ADAPTER_DESC1 desc;
334 adapter->GetDesc1(&desc);
335 if (desc.AdapterLuid.LowPart == adapterLuid.LowPart
336 && desc.AdapterLuid.HighPart == adapterLuid.HighPart)
337 {
338 activeAdapter = adapter;
339 QRhiD3D::fillDriverInfo(&driverInfoStruct, desc);
340 break;
341 } else {
342 adapter->Release();
343 }
344 }
345 if (!activeAdapter) {
346 qWarning("No adapter");
347 return false;
348 }
349 qCDebug(QRHI_LOG_INFO, "Using imported device %p", dev);
350 }
351
352 QDxgiVSyncService::instance()->refAdapter(adapterLuid);
353
354 if (debugLayer) {
355 ID3D12InfoQueue *infoQueue;
356 if (SUCCEEDED(dev->QueryInterface(__uuidof(ID3D12InfoQueue), reinterpret_cast<void **>(&infoQueue)))) {
357 if (qEnvironmentVariableIntValue("QT_D3D_DEBUG_BREAK")) {
358 infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true);
359 infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true);
360 infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, true);
361 }
362 D3D12_INFO_QUEUE_FILTER filter = {};
363 D3D12_MESSAGE_ID suppressedMessages[2] = {
364 // there is no way of knowing the clear color upfront
365 D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE,
366 // we have no control over viewport and scissor rects
367 D3D12_MESSAGE_ID_DRAW_EMPTY_SCISSOR_RECTANGLE
368 };
369 filter.DenyList.NumIDs = 2;
370 filter.DenyList.pIDList = suppressedMessages;
371 // Setting the filter would enable Info messages (e.g. about
372 // resource creation) which we don't need.
373 D3D12_MESSAGE_SEVERITY infoSev = D3D12_MESSAGE_SEVERITY_INFO;
374 filter.DenyList.NumSeverities = 1;
375 filter.DenyList.pSeverityList = &infoSev;
376 infoQueue->PushStorageFilter(&filter);
377 infoQueue->Release();
378 }
379 }
380
381 if (!importedCommandQueue) {
382 D3D12_COMMAND_QUEUE_DESC queueDesc = {};
383 queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
384 queueDesc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
385 hr = dev->CreateCommandQueue(&queueDesc, __uuidof(ID3D12CommandQueue), reinterpret_cast<void **>(&cmdQueue));
386 if (FAILED(hr)) {
387 qWarning("Failed to create command queue: %s", qPrintable(QSystemError::windowsComString(hr)));
388 return false;
389 }
390 }
391
392 hr = dev->CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), reinterpret_cast<void **>(&fullFence));
393 if (FAILED(hr)) {
394 qWarning("Failed to create fence: %s", qPrintable(QSystemError::windowsComString(hr)));
395 return false;
396 }
397 fullFenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
398 fullFenceCounter = 0;
399
400 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
401 hr = dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
402 __uuidof(ID3D12CommandAllocator),
403 reinterpret_cast<void **>(&cmdAllocators[i]));
404 if (FAILED(hr)) {
405 qWarning("Failed to create command allocator: %s", qPrintable(QSystemError::windowsComString(hr)));
406 return false;
407 }
408 }
409
410 if (!vma.create(dev, activeAdapter)) {
411 qWarning("Failed to initialize graphics memory suballocator");
412 return false;
413 }
414
415 if (!rtvPool.create(dev, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, "main RTV pool")) {
416 qWarning("Could not create RTV pool");
417 return false;
418 }
419
420 if (!dsvPool.create(dev, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, "main DSV pool")) {
421 qWarning("Could not create DSV pool");
422 return false;
423 }
424
425 if (!cbvSrvUavPool.create(dev, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, "main CBV-SRV-UAV pool")) {
426 qWarning("Could not create CBV-SRV-UAV pool");
427 return false;
428 }
429
430 resourcePool.create("main resource pool");
431 pipelinePool.create("main pipeline pool");
432 rootSignaturePool.create("main root signature pool");
433 releaseQueue.create(&resourcePool, &pipelinePool, &rootSignaturePool);
434 barrierGen.create(&resourcePool);
435
436 if (!samplerMgr.create(dev)) {
437 qWarning("Could not create sampler pool and shader-visible sampler heap");
438 return false;
439 }
440
441 if (!mipmapGen.create(this)) {
442 qWarning("Could not initialize mipmap generator");
443 return false;
444 }
445
446 if (!mipmapGen3D.create(this)) {
447 qWarning("Could not initialize 3D texture mipmap generator");
448 return false;
449 }
450
451 const qint32 smallStagingSize = aligned(SMALL_STAGING_AREA_BYTES_PER_FRAME, QD3D12StagingArea::ALIGNMENT);
452 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
453 if (!smallStagingAreas[i].create(this, smallStagingSize, D3D12_HEAP_TYPE_UPLOAD)) {
454 qWarning("Could not create host-visible staging area");
455 return false;
456 }
457 QString decoratedName = QLatin1String("Small staging area buffer/");
458 decoratedName += QString::number(i);
459 smallStagingAreas[i].mem.buffer->SetName(reinterpret_cast<LPCWSTR>(decoratedName.utf16()));
460 }
461
462 if (!shaderVisibleCbvSrvUavHeap.create(dev,
463 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
464 SHADER_VISIBLE_CBV_SRV_UAV_HEAP_PER_FRAME_START_SIZE))
465 {
466 qWarning("Could not create first shader-visible CBV/SRV/UAV heap");
467 return false;
468 }
469
470 if (flags.testFlag(QRhi::EnableTimestamps)) {
471 static bool wantsStablePowerState = qEnvironmentVariableIntValue("QT_D3D_STABLE_POWER_STATE");
472 //
473 // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-setstablepowerstate
474 //
475 // NB! This is a _global_ setting, affecting other processes (and 3D
476 // APIs such as Vulkan), as long as this application is running. Hence
477 // making it an env.var. for now. Never enable it in production. But
478 // extremely useful for the GPU timings with NVIDIA at least; the
479 // timestamps become stable and smooth, making the number readable and
480 // actually useful e.g. in Quick 3D's DebugView when this is enabled.
481 // (otherwise the number's all over the place)
482 //
483 // See also
484 // https://developer.nvidia.com/blog/advanced-api-performance-setstablepowerstate/
485 // for possible other approaches.
486 //
487 if (wantsStablePowerState)
488 dev->SetStablePowerState(TRUE);
489
490 hr = cmdQueue->GetTimestampFrequency(&timestampTicksPerSecond);
491 if (FAILED(hr)) {
492 qWarning("Failed to query timestamp frequency: %s",
493 qPrintable(QSystemError::windowsComString(hr)));
494 return false;
495 }
496 if (!timestampQueryHeap.create(dev, QD3D12_FRAMES_IN_FLIGHT * 2, D3D12_QUERY_HEAP_TYPE_TIMESTAMP)) {
497 qWarning("Failed to create timestamp query pool");
498 return false;
499 }
500 const quint32 readbackBufSize = QD3D12_FRAMES_IN_FLIGHT * 2 * sizeof(quint64);
501 if (!timestampReadbackArea.create(this, readbackBufSize, D3D12_HEAP_TYPE_READBACK)) {
502 qWarning("Failed to create timestamp readback buffer");
503 return false;
504 }
505 timestampReadbackArea.mem.buffer->SetName(L"Timestamp readback buffer");
506 memset(timestampReadbackArea.mem.p, 0, readbackBufSize);
507 }
508
509 caps = {};
510 D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {};
511 if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3)))) {
512 caps.multiView = options3.ViewInstancingTier != D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED;
513 // https://microsoft.github.io/DirectX-Specs/d3d/RelaxedCasting.html
514 caps.textureViewFormat = options3.CastingFullyTypedFormatSupported;
515 }
516
517#ifdef QRHI_D3D12_CL5_AVAILABLE
518 D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = {};
519 if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS6, &options6, sizeof(options6)))) {
520 caps.vrs = options6.VariableShadingRateTier != D3D12_VARIABLE_SHADING_RATE_TIER_NOT_SUPPORTED;
521 caps.vrsMap = options6.VariableShadingRateTier == D3D12_VARIABLE_SHADING_RATE_TIER_2;
522 caps.vrsAdditionalRates = options6.AdditionalShadingRatesSupported;
523 shadingRateImageTileSize = options6.ShadingRateImageTileSize;
524 }
525#else
526 caps.vrs = false;
527 caps.vrsMap = false;
528 caps.vrsAdditionalRates = false;
529#endif
530
531 {
532 D3D12_INDIRECT_ARGUMENT_DESC arg = {};
533 arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW;
534
535 D3D12_COMMAND_SIGNATURE_DESC sigDesc = {};
536 sigDesc.ByteStride = sizeof(D3D12_DRAW_ARGUMENTS);
537 sigDesc.NumArgumentDescs = 1;
538 sigDesc.pArgumentDescs = &arg;
539
540 hr = dev->CreateCommandSignature(&sigDesc, nullptr, IID_PPV_ARGS(&drawCommandSignature));
541 if (FAILED(hr)) {
542 qWarning("Failed to create draw command signature: %s", qPrintable(QSystemError::windowsComString(hr)));
543 return false;
544 }
545 }
546
547 {
548 D3D12_INDIRECT_ARGUMENT_DESC arg = {};
549 arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
550
551 D3D12_COMMAND_SIGNATURE_DESC sigDesc = {};
552 sigDesc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
553 sigDesc.NumArgumentDescs = 1;
554 sigDesc.pArgumentDescs = &arg;
555
556 hr = dev->CreateCommandSignature(&sigDesc, nullptr, IID_PPV_ARGS(&drawIndexedCommandSignature));
557 if (FAILED(hr)) {
558 qWarning("Failed to create draw indexed command signature: %s", qPrintable(QSystemError::windowsComString(hr)));
559 return false;
560 }
561 }
562
563 deviceLost = false;
564 offscreenActive = false;
565
566 nativeHandlesStruct.dev = dev;
567 nativeHandlesStruct.minimumFeatureLevel = minimumFeatureLevel;
568 nativeHandlesStruct.adapterLuidLow = adapterLuid.LowPart;
569 nativeHandlesStruct.adapterLuidHigh = adapterLuid.HighPart;
570 nativeHandlesStruct.commandQueue = cmdQueue;
571
572 return true;
573}
574
575void QRhiD3D12::destroy()
576{
577 if (!deviceLost && fullFence && fullFenceEvent)
578 waitGpu();
579
580 releaseQueue.releaseAll();
581
582 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
583 if (offscreenCb[i]) {
584 if (offscreenCb[i]->cmdList)
585 offscreenCb[i]->cmdList->Release();
586 delete offscreenCb[i];
587 offscreenCb[i] = nullptr;
588 }
589 }
590
591 timestampQueryHeap.destroy();
592 timestampReadbackArea.destroy();
593
594 shaderVisibleCbvSrvUavHeap.destroy();
595
596 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i)
597 smallStagingAreas[i].destroy();
598
599 mipmapGen.destroy();
600 mipmapGen3D.destroy();
601 samplerMgr.destroy();
602 resourcePool.destroy();
603 pipelinePool.destroy();
604 rootSignaturePool.destroy();
605 rtvPool.destroy();
606 dsvPool.destroy();
607 cbvSrvUavPool.destroy();
608
609 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
610 if (cmdAllocators[i]) {
611 cmdAllocators[i]->Release();
612 cmdAllocators[i] = nullptr;
613 }
614 }
615
616 if (fullFenceEvent) {
617 CloseHandle(fullFenceEvent);
618 fullFenceEvent = nullptr;
619 }
620
621 if (fullFence) {
622 fullFence->Release();
623 fullFence = nullptr;
624 }
625
626 if (!importedCommandQueue) {
627 if (cmdQueue) {
628 cmdQueue->Release();
629 cmdQueue = nullptr;
630 }
631 }
632
633 vma.destroy();
634
635 if (!importedDevice) {
636 if (dev) {
637 dev->Release();
638 dev = nullptr;
639 }
640 }
641
642 if (dcompDevice) {
643 dcompDevice->Release();
644 dcompDevice = nullptr;
645 }
646
647 if (activeAdapter) {
648 activeAdapter->Release();
649 activeAdapter = nullptr;
650 }
651
652 if (dxgiFactory) {
653 dxgiFactory->Release();
654 dxgiFactory = nullptr;
655 }
656
657 adapterLuid = {};
658 importedDevice = false;
659 importedCommandQueue = false;
660
661 QDxgiVSyncService::instance()->derefAdapter(adapterLuid);
662
663 if (drawCommandSignature) {
664 drawCommandSignature->Release();
665 drawCommandSignature = nullptr;
666 }
667
668 if (drawIndexedCommandSignature) {
669 drawIndexedCommandSignature->Release();
670 drawIndexedCommandSignature = nullptr;
671 }
672}
673
674QRhi::AdapterList QRhiD3D12::enumerateAdaptersBeforeCreate(QRhiNativeHandles *nativeHandles) const
675{
676 LUID requestedLuid = {};
677 if (nativeHandles) {
678 QRhiD3D12NativeHandles *h = static_cast<QRhiD3D12NativeHandles *>(nativeHandles);
679 const LUID adapterLuid = { h->adapterLuidLow, h->adapterLuidHigh };
680 if (adapterLuid.LowPart || adapterLuid.HighPart)
681 requestedLuid = adapterLuid;
682 }
683
684 IDXGIFactory2 *dxgi = nullptr;
685 if (FAILED(CreateDXGIFactory2(0, __uuidof(IDXGIFactory2), reinterpret_cast<void **>(&dxgi))))
686 return {};
687
688 QRhi::AdapterList list;
689 IDXGIAdapter1 *adapter;
690 for (int adapterIndex = 0; dxgi->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) {
691 DXGI_ADAPTER_DESC1 desc;
692 adapter->GetDesc1(&desc);
693 adapter->Release();
694 if (requestedLuid.LowPart || requestedLuid.HighPart) {
695 if (desc.AdapterLuid.LowPart != requestedLuid.LowPart
696 || desc.AdapterLuid.HighPart != requestedLuid.HighPart)
697 {
698 continue;
699 }
700 }
701 QD3D12Adapter *a = new QD3D12Adapter;
702 a->luid = desc.AdapterLuid;
703 QRhiD3D::fillDriverInfo(&a->adapterInfo, desc);
704 list.append(a);
705 }
706
707 dxgi->Release();
708 return list;
709}
710
711QRhiDriverInfo QD3D12Adapter::info() const
712{
713 return adapterInfo;
714}
715
716QList<int> QRhiD3D12::supportedSampleCounts() const
717{
718 return { 1, 2, 4, 8 };
719}
720
721QList<QSize> QRhiD3D12::supportedShadingRates(int sampleCount) const
722{
723 QList<QSize> sizes;
724 switch (sampleCount) {
725 case 0:
726 case 1:
727 if (caps.vrsAdditionalRates) {
728 sizes.append(QSize(4, 4));
729 sizes.append(QSize(4, 2));
730 sizes.append(QSize(2, 4));
731 }
732 sizes.append(QSize(2, 2));
733 sizes.append(QSize(2, 1));
734 sizes.append(QSize(1, 2));
735 break;
736 case 2:
737 if (caps.vrsAdditionalRates)
738 sizes.append(QSize(2, 4));
739 sizes.append(QSize(2, 2));
740 sizes.append(QSize(2, 1));
741 sizes.append(QSize(1, 2));
742 break;
743 case 4:
744 sizes.append(QSize(2, 2));
745 sizes.append(QSize(2, 1));
746 sizes.append(QSize(1, 2));
747 break;
748 default:
749 break;
750 }
751 sizes.append(QSize(1, 1));
752 return sizes;
753}
754
755QRhiSwapChain *QRhiD3D12::createSwapChain()
756{
757 return new QD3D12SwapChain(this);
758}
759
760QRhiBuffer *QRhiD3D12::createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, quint32 size)
761{
762 return new QD3D12Buffer(this, type, usage, size);
763}
764
765int QRhiD3D12::ubufAlignment() const
766{
767 return D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; // 256
768}
769
770bool QRhiD3D12::isYUpInFramebuffer() const
771{
772 return false;
773}
774
775bool QRhiD3D12::isYUpInNDC() const
776{
777 return true;
778}
779
780bool QRhiD3D12::isClipDepthZeroToOne() const
781{
782 return true;
783}
784
785QMatrix4x4 QRhiD3D12::clipSpaceCorrMatrix() const
786{
787 // Like with Vulkan, but Y is already good.
788
789 static QMatrix4x4 m;
790 if (m.isIdentity()) {
791 // NB the ctor takes row-major
792 m = QMatrix4x4(1.0f, 0.0f, 0.0f, 0.0f,
793 0.0f, 1.0f, 0.0f, 0.0f,
794 0.0f, 0.0f, 0.5f, 0.5f,
795 0.0f, 0.0f, 0.0f, 1.0f);
796 }
797 return m;
798}
799
800bool QRhiD3D12::isTextureFormatSupported(QRhiTexture::Format format, QRhiTexture::Flags flags) const
801{
802 Q_UNUSED(flags);
803
804 if (format >= QRhiTexture::ETC2_RGB8 && format <= QRhiTexture::ASTC_12x12)
805 return false;
806
807 return true;
808}
809
810bool QRhiD3D12::isFeatureSupported(QRhi::Feature feature) const
811{
812 switch (feature) {
813 case QRhi::MultisampleTexture:
814 return true;
815 case QRhi::MultisampleRenderBuffer:
816 return true;
817 case QRhi::DebugMarkers:
818#ifdef QRHI_D3D12_HAS_OLD_PIX
819 return true;
820#else
821 return false;
822#endif
823 case QRhi::Timestamps:
824 return true;
825 case QRhi::Instancing:
826 return true;
827 case QRhi::CustomInstanceStepRate:
828 return true;
829 case QRhi::PrimitiveRestart:
830 return true;
831 case QRhi::NonDynamicUniformBuffers:
832 return false;
833 case QRhi::NonFourAlignedEffectiveIndexBufferOffset:
834 return true;
835 case QRhi::NPOTTextureRepeat:
836 return true;
837 case QRhi::RedOrAlpha8IsRed:
838 return true;
839 case QRhi::ElementIndexUint:
840 return true;
841 case QRhi::Compute:
842 return true;
843 case QRhi::WideLines:
844 return false;
845 case QRhi::VertexShaderPointSize:
846 return false;
847 case QRhi::BaseVertex:
848 return true;
849 case QRhi::BaseInstance:
850 return true;
851 case QRhi::TriangleFanTopology:
852 return false;
853 case QRhi::ReadBackNonUniformBuffer:
854 return true;
855 case QRhi::ReadBackNonBaseMipLevel:
856 return true;
857 case QRhi::TexelFetch:
858 return true;
859 case QRhi::RenderToNonBaseMipLevel:
860 return true;
861 case QRhi::IntAttributes:
862 return true;
863 case QRhi::ScreenSpaceDerivatives:
864 return true;
865 case QRhi::ReadBackAnyTextureFormat:
866 return true;
867 case QRhi::PipelineCacheDataLoadSave:
868 return false; // ###
869 case QRhi::ImageDataStride:
870 return true;
871 case QRhi::RenderBufferImport:
872 return false;
873 case QRhi::ThreeDimensionalTextures:
874 return true;
875 case QRhi::RenderTo3DTextureSlice:
876 return true;
877 case QRhi::TextureArrays:
878 return true;
879 case QRhi::Tessellation:
880 return true;
881 case QRhi::GeometryShader:
882 return true;
883 case QRhi::TextureArrayRange:
884 return true;
885 case QRhi::NonFillPolygonMode:
886 return true;
887 case QRhi::OneDimensionalTextures:
888 return true;
889 case QRhi::OneDimensionalTextureMipmaps:
890 return false; // we generate mipmaps ourselves with compute and this is not implemented
891 case QRhi::HalfAttributes:
892 return true;
893 case QRhi::RenderToOneDimensionalTexture:
894 return true;
895 case QRhi::ThreeDimensionalTextureMipmaps:
896 return true;
897 case QRhi::MultiView:
898 return caps.multiView;
899 case QRhi::TextureViewFormat:
900 return caps.textureViewFormat;
901 case QRhi::ResolveDepthStencil:
902 // there is no Multisample Resolve support for depth/stencil formats
903 // https://learn.microsoft.com/en-us/windows/win32/direct3ddxgi/hardware-support-for-direct3d-12-1-formats
904 return false;
905 case QRhi::VariableRateShading:
906 return caps.vrs;
907 case QRhi::VariableRateShadingMap:
908 case QRhi::VariableRateShadingMapWithTexture:
909 return caps.vrsMap;
910 case QRhi::PerRenderTargetBlending:
911 case QRhi::SampleVariables:
912 return true;
913 case QRhi::InstanceIndexIncludesBaseInstance:
914 return false;
915 case QRhi::DepthClamp:
916 return true;
917 case QRhi::DrawIndirect:
918 return drawCommandSignature != nullptr && drawIndexedCommandSignature != nullptr;
919 case QRhi::DrawIndirectMulti:
920 return drawCommandSignature != nullptr && drawIndexedCommandSignature != nullptr;
921 }
922 return false;
923}
924
925int QRhiD3D12::resourceLimit(QRhi::ResourceLimit limit) const
926{
927 switch (limit) {
928 case QRhi::TextureSizeMin:
929 return 1;
930 case QRhi::TextureSizeMax:
931 return 16384;
932 case QRhi::MaxColorAttachments:
933 return 8;
934 case QRhi::FramesInFlight:
935 return QD3D12_FRAMES_IN_FLIGHT;
936 case QRhi::MaxAsyncReadbackFrames:
937 return QD3D12_FRAMES_IN_FLIGHT;
938 case QRhi::MaxThreadGroupsPerDimension:
939 return 65535;
940 case QRhi::MaxThreadsPerThreadGroup:
941 return 1024;
942 case QRhi::MaxThreadGroupX:
943 return 1024;
944 case QRhi::MaxThreadGroupY:
945 return 1024;
946 case QRhi::MaxThreadGroupZ:
947 return 1024;
948 case QRhi::TextureArraySizeMax:
949 return 2048;
950 case QRhi::MaxUniformBufferRange:
951 return 65536;
952 case QRhi::MaxVertexInputs:
953 return 32;
954 case QRhi::MaxVertexOutputs:
955 return 32;
956 case QRhi::ShadingRateImageTileSize:
957 return shadingRateImageTileSize;
958 }
959 return 0;
960}
961
962const QRhiNativeHandles *QRhiD3D12::nativeHandles()
963{
964 return &nativeHandlesStruct;
965}
966
967QRhiDriverInfo QRhiD3D12::driverInfo() const
968{
969 return driverInfoStruct;
970}
971
972QRhiStats QRhiD3D12::statistics()
973{
974 QRhiStats result;
975 result.totalPipelineCreationTime = totalPipelineCreationTime();
976
977 D3D12MA::Budget budgets[2]; // [gpu, system] with discreet GPU or [shared, nothing] with UMA
978 vma.getBudget(&budgets[0], &budgets[1]);
979 for (int i = 0; i < 2; ++i) {
980 const D3D12MA::Statistics &stats(budgets[i].Stats);
981 result.blockCount += stats.BlockCount;
982 result.allocCount += stats.AllocationCount;
983 result.usedBytes += stats.AllocationBytes;
984 result.unusedBytes += stats.BlockBytes - stats.AllocationBytes;
985 result.totalUsageBytes += budgets[i].UsageBytes;
986 }
987
988 return result;
989}
990
991bool QRhiD3D12::makeThreadLocalNativeContextCurrent()
992{
993 // not applicable
994 return false;
995}
996
997void QRhiD3D12::setQueueSubmitParams(QRhiNativeHandles *)
998{
999 // not applicable
1000}
1001
1002void QRhiD3D12::releaseCachedResources()
1003{
1004 shaderBytecodeCache.data.clear();
1005}
1006
1007bool QRhiD3D12::isDeviceLost() const
1008{
1009 return deviceLost;
1010}
1011
1012QByteArray QRhiD3D12::pipelineCacheData()
1013{
1014 return {};
1015}
1016
1017void QRhiD3D12::setPipelineCacheData(const QByteArray &data)
1018{
1019 Q_UNUSED(data);
1020}
1021
1022QRhiRenderBuffer *QRhiD3D12::createRenderBuffer(QRhiRenderBuffer::Type type, const QSize &pixelSize,
1023 int sampleCount, QRhiRenderBuffer::Flags flags,
1024 QRhiTexture::Format backingFormatHint)
1025{
1026 return new QD3D12RenderBuffer(this, type, pixelSize, sampleCount, flags, backingFormatHint);
1027}
1028
1029QRhiTexture *QRhiD3D12::createTexture(QRhiTexture::Format format,
1030 const QSize &pixelSize, int depth, int arraySize,
1031 int sampleCount, QRhiTexture::Flags flags)
1032{
1033 return new QD3D12Texture(this, format, pixelSize, depth, arraySize, sampleCount, flags);
1034}
1035
1036QRhiSampler *QRhiD3D12::createSampler(QRhiSampler::Filter magFilter, QRhiSampler::Filter minFilter,
1037 QRhiSampler::Filter mipmapMode,
1038 QRhiSampler::AddressMode u, QRhiSampler::AddressMode v, QRhiSampler::AddressMode w)
1039{
1040 return new QD3D12Sampler(this, magFilter, minFilter, mipmapMode, u, v, w);
1041}
1042
1043QRhiTextureRenderTarget *QRhiD3D12::createTextureRenderTarget(const QRhiTextureRenderTargetDescription &desc,
1044 QRhiTextureRenderTarget::Flags flags)
1045{
1046 return new QD3D12TextureRenderTarget(this, desc, flags);
1047}
1048
1049QRhiShadingRateMap *QRhiD3D12::createShadingRateMap()
1050{
1051 return new QD3D12ShadingRateMap(this);
1052}
1053
1054QRhiGraphicsPipeline *QRhiD3D12::createGraphicsPipeline()
1055{
1056 return new QD3D12GraphicsPipeline(this);
1057}
1058
1059QRhiComputePipeline *QRhiD3D12::createComputePipeline()
1060{
1061 return new QD3D12ComputePipeline(this);
1062}
1063
1064QRhiShaderResourceBindings *QRhiD3D12::createShaderResourceBindings()
1065{
1066 return new QD3D12ShaderResourceBindings(this);
1067}
1068
1069void QRhiD3D12::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps)
1070{
1071 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1072 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1073 QD3D12GraphicsPipeline *psD = QRHI_RES(QD3D12GraphicsPipeline, ps);
1074 const bool pipelineChanged = cbD->currentGraphicsPipeline != psD || cbD->currentPipelineGeneration != psD->generation;
1075
1076 if (pipelineChanged) {
1077 cbD->currentGraphicsPipeline = psD;
1078 cbD->currentComputePipeline = nullptr;
1079 cbD->currentPipelineGeneration = psD->generation;
1080
1081 if (QD3D12Pipeline *pipeline = pipelinePool.lookupRef(psD->handle)) {
1082 Q_ASSERT(pipeline->type == QD3D12Pipeline::Graphics);
1083 cbD->cmdList->SetPipelineState(pipeline->pso);
1084 if (QD3D12RootSignature *rs = rootSignaturePool.lookupRef(psD->rootSigHandle))
1085 cbD->cmdList->SetGraphicsRootSignature(rs->rootSig);
1086 }
1087
1088 cbD->cmdList->IASetPrimitiveTopology(psD->topology);
1089
1090 if (psD->viewInstanceMask)
1091 cbD->cmdList->SetViewInstanceMask(psD->viewInstanceMask);
1092
1093 if (cbD->hasCustomScissorSet && !psD->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor))
1094 setDefaultScissor(cbD);
1095 }
1096}
1097
1098void QD3D12CommandBuffer::visitUniformBuffer(QD3D12Stage s,
1099 const QRhiShaderResourceBinding::Data::UniformBufferData &d,
1100 int,
1101 int binding,
1102 int dynamicOffsetCount,
1103 const QRhiCommandBuffer::DynamicOffset *dynamicOffsets)
1104{
1105 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, d.buf);
1106 quint32 offset = d.offset;
1107 if (d.hasDynamicOffset) {
1108 for (int i = 0; i < dynamicOffsetCount; ++i) {
1109 const QRhiCommandBuffer::DynamicOffset &dynOfs(dynamicOffsets[i]);
1110 if (dynOfs.first == binding) {
1111 Q_ASSERT(aligned(dynOfs.second, 256u) == dynOfs.second);
1112 offset += dynOfs.second;
1113 }
1114 }
1115 }
1116 QRHI_RES_RHI(QRhiD3D12);
1117 visitorData.cbufs[s].append({ bufD->handles[rhiD->currentFrameSlot], offset });
1118}
1119
1120void QD3D12CommandBuffer::visitTexture(QD3D12Stage s,
1121 const QRhiShaderResourceBinding::TextureAndSampler &d,
1122 int)
1123{
1124 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, d.tex);
1125 visitorData.srvs[s].append(texD->srv);
1126}
1127
1128void QD3D12CommandBuffer::visitSampler(QD3D12Stage s,
1129 const QRhiShaderResourceBinding::TextureAndSampler &d,
1130 int)
1131{
1132 QD3D12Sampler *samplerD = QRHI_RES(QD3D12Sampler, d.sampler);
1133 visitorData.samplers[s].append(samplerD->lookupOrCreateShaderVisibleDescriptor());
1134}
1135
1136void QD3D12CommandBuffer::visitStorageBuffer(QD3D12Stage s,
1137 const QRhiShaderResourceBinding::Data::StorageBufferData &d,
1138 QD3D12ShaderResourceVisitor::StorageOp,
1139 int)
1140{
1141 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, d.buf);
1142 // SPIRV-Cross generated HLSL uses RWByteAddressBuffer
1143 D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
1144 uavDesc.Format = DXGI_FORMAT_R32_TYPELESS;
1145 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
1146 uavDesc.Buffer.FirstElement = d.offset / 4;
1147 uavDesc.Buffer.NumElements = aligned(bufD->m_size - d.offset, 4u) / 4;
1148 uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
1149 visitorData.uavs[s].append({ bufD->handles[0], uavDesc });
1150}
1151
1152void QD3D12CommandBuffer::visitStorageImage(QD3D12Stage s,
1153 const QRhiShaderResourceBinding::Data::StorageImageData &d,
1154 QD3D12ShaderResourceVisitor::StorageOp,
1155 int)
1156{
1157 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, d.tex);
1158 const bool isCube = texD->m_flags.testFlag(QRhiTexture::CubeMap);
1159 const bool isArray = texD->m_flags.testFlag(QRhiTexture::TextureArray);
1160 const bool is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
1161 D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
1162 uavDesc.Format = texD->rtFormat;
1163 if (isCube) {
1164 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
1165 uavDesc.Texture2DArray.MipSlice = UINT(d.level);
1166 uavDesc.Texture2DArray.FirstArraySlice = 0;
1167 uavDesc.Texture2DArray.ArraySize = 6;
1168 } else if (isArray) {
1169 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
1170 uavDesc.Texture2DArray.MipSlice = UINT(d.level);
1171 uavDesc.Texture2DArray.FirstArraySlice = 0;
1172 uavDesc.Texture2DArray.ArraySize = UINT(qMax(0, texD->m_arraySize));
1173 } else if (is3D) {
1174 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
1175 uavDesc.Texture3D.MipSlice = UINT(d.level);
1176 uavDesc.Texture3D.WSize = UINT(-1);
1177 } else {
1178 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
1179 uavDesc.Texture2D.MipSlice = UINT(d.level);
1180 }
1181 visitorData.uavs[s].append({ texD->handle, uavDesc });
1182}
1183
1184void QRhiD3D12::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBindings *srb,
1185 int dynamicOffsetCount,
1186 const QRhiCommandBuffer::DynamicOffset *dynamicOffsets)
1187{
1188 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1189 Q_ASSERT(cbD->recordingPass != QD3D12CommandBuffer::NoPass);
1190 QD3D12GraphicsPipeline *gfxPsD = QRHI_RES(QD3D12GraphicsPipeline, cbD->currentGraphicsPipeline);
1191 QD3D12ComputePipeline *compPsD = QRHI_RES(QD3D12ComputePipeline, cbD->currentComputePipeline);
1192
1193 if (!srb) {
1194 if (gfxPsD)
1195 srb = gfxPsD->m_shaderResourceBindings;
1196 else
1197 srb = compPsD->m_shaderResourceBindings;
1198 }
1199
1200 QD3D12ShaderResourceBindings *srbD = QRHI_RES(QD3D12ShaderResourceBindings, srb);
1201
1202 bool pipelineChanged = false;
1203 if (gfxPsD) {
1204 pipelineChanged = srbD->lastUsedGraphicsPipeline != gfxPsD;
1205 srbD->lastUsedGraphicsPipeline = gfxPsD;
1206 } else {
1207 pipelineChanged = srbD->lastUsedComputePipeline != compPsD;
1208 srbD->lastUsedComputePipeline = compPsD;
1209 }
1210
1211 for (int i = 0, ie = srbD->m_bindings.size(); i != ie; ++i) {
1212 const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->m_bindings[i]);
1213 switch (b->type) {
1214 case QRhiShaderResourceBinding::UniformBuffer:
1215 {
1216 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, b->u.ubuf.buf);
1217 Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer));
1218 Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic);
1219 sanityCheckResourceOwnership(bufD);
1220 bufD->executeHostWritesForFrameSlot(currentFrameSlot);
1221 }
1222 break;
1223 case QRhiShaderResourceBinding::SampledTexture:
1224 case QRhiShaderResourceBinding::Texture:
1225 case QRhiShaderResourceBinding::Sampler:
1226 {
1227 const QRhiShaderResourceBinding::Data::TextureAndOrSamplerData *data = &b->u.stex;
1228 for (int elem = 0; elem < data->count; ++elem) {
1229 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, data->texSamplers[elem].tex);
1230 QD3D12Sampler *samplerD = QRHI_RES(QD3D12Sampler, data->texSamplers[elem].sampler);
1231 // We use the same code path for both combined and separate
1232 // images and samplers, so tex or sampler (but not both) can be
1233 // null here.
1234 Q_ASSERT(texD || samplerD);
1235 sanityCheckResourceOwnership(texD);
1236 sanityCheckResourceOwnership(samplerD);
1237 if (texD) {
1238 UINT state = 0;
1239 if (b->stage == QRhiShaderResourceBinding::FragmentStage) {
1240 state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
1241 } else if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
1242 state = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
1243 } else {
1244 state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
1245 }
1246 barrierGen.addTransitionBarrier(texD->handle, D3D12_RESOURCE_STATES(state));
1247 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1248 }
1249 }
1250 }
1251 break;
1252 case QRhiShaderResourceBinding::ImageLoad:
1253 case QRhiShaderResourceBinding::ImageStore:
1254 case QRhiShaderResourceBinding::ImageLoadStore:
1255 {
1256 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, b->u.simage.tex);
1257 sanityCheckResourceOwnership(texD);
1258 if (QD3D12Resource *res = resourcePool.lookupRef(texD->handle)) {
1259 if (res->uavUsage) {
1260 if (res->uavUsage & QD3D12Resource::UavUsageWrite) {
1261 // RaW or WaW
1262 barrierGen.enqueueUavBarrier(cbD, texD->handle);
1263 } else {
1264 if (b->type == QRhiShaderResourceBinding::ImageStore
1265 || b->type == QRhiShaderResourceBinding::ImageLoadStore)
1266 {
1267 // WaR or WaW
1268 barrierGen.enqueueUavBarrier(cbD, texD->handle);
1269 }
1270 }
1271 }
1272 res->uavUsage = 0;
1273 if (b->type == QRhiShaderResourceBinding::ImageLoad || b->type == QRhiShaderResourceBinding::ImageLoadStore)
1274 res->uavUsage |= QD3D12Resource::UavUsageRead;
1275 if (b->type == QRhiShaderResourceBinding::ImageStore || b->type == QRhiShaderResourceBinding::ImageLoadStore)
1276 res->uavUsage |= QD3D12Resource::UavUsageWrite;
1277 barrierGen.addTransitionBarrier(texD->handle, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
1278 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1279 }
1280 }
1281 break;
1282 case QRhiShaderResourceBinding::BufferLoad:
1283 case QRhiShaderResourceBinding::BufferStore:
1284 case QRhiShaderResourceBinding::BufferLoadStore:
1285 {
1286 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, b->u.sbuf.buf);
1287 sanityCheckResourceOwnership(bufD);
1288 Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer));
1289 Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
1290 if (QD3D12Resource *res = resourcePool.lookupRef(bufD->handles[0])) {
1291 if (res->uavUsage) {
1292 if (res->uavUsage & QD3D12Resource::UavUsageWrite) {
1293 // RaW or WaW
1294 barrierGen.enqueueUavBarrier(cbD, bufD->handles[0]);
1295 } else {
1296 if (b->type == QRhiShaderResourceBinding::BufferStore
1297 || b->type == QRhiShaderResourceBinding::BufferLoadStore)
1298 {
1299 // WaR or WaW
1300 barrierGen.enqueueUavBarrier(cbD, bufD->handles[0]);
1301 }
1302 }
1303 }
1304 res->uavUsage = 0;
1305 if (b->type == QRhiShaderResourceBinding::BufferLoad || b->type == QRhiShaderResourceBinding::BufferLoadStore)
1306 res->uavUsage |= QD3D12Resource::UavUsageRead;
1307 if (b->type == QRhiShaderResourceBinding::BufferStore || b->type == QRhiShaderResourceBinding::BufferLoadStore)
1308 res->uavUsage |= QD3D12Resource::UavUsageWrite;
1309 barrierGen.addTransitionBarrier(bufD->handles[0], D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
1310 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1311 }
1312 }
1313 break;
1314 }
1315 }
1316
1317 const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb);
1318 const bool srbRebuilt = cbD->currentSrbGeneration != srbD->generation;
1319
1320 if (pipelineChanged || srbChanged || srbRebuilt || srbD->hasDynamicOffset) {
1321 const QD3D12ShaderStageData *stageData = gfxPsD ? gfxPsD->stageData.data() : &compPsD->stageData;
1322
1323 // The order of root parameters must match
1324 // QD3D12ShaderResourceBindings::createRootSignature(), meaning the
1325 // logic below must mirror that function (uniform buffers first etc.)
1326
1327 QD3D12ShaderResourceVisitor visitor(srbD, stageData, gfxPsD ? 5 : 1);
1328
1329 QD3D12CommandBuffer::VisitorData &visitorData(cbD->visitorData);
1330 visitorData = {};
1331
1332 using namespace std::placeholders;
1333 visitor.uniformBuffer = std::bind(&QD3D12CommandBuffer::visitUniformBuffer, cbD, _1, _2, _3, _4, dynamicOffsetCount, dynamicOffsets);
1334 visitor.texture = std::bind(&QD3D12CommandBuffer::visitTexture, cbD, _1, _2, _3);
1335 visitor.sampler = std::bind(&QD3D12CommandBuffer::visitSampler, cbD, _1, _2, _3);
1336 visitor.storageBuffer = std::bind(&QD3D12CommandBuffer::visitStorageBuffer, cbD, _1, _2, _3, _4);
1337 visitor.storageImage = std::bind(&QD3D12CommandBuffer::visitStorageImage, cbD, _1, _2, _3, _4);
1338
1339 visitor.visit();
1340
1341 quint32 cbvSrvUavCount = 0;
1342 for (int s = 0; s < 6; ++s) {
1343 // CBs use root constant buffer views, no need to count them here
1344 cbvSrvUavCount += visitorData.srvs[s].count();
1345 cbvSrvUavCount += visitorData.uavs[s].count();
1346 }
1347
1348 bool gotNewHeap = false;
1349 if (!ensureShaderVisibleDescriptorHeapCapacity(&shaderVisibleCbvSrvUavHeap,
1350 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
1351 currentFrameSlot,
1352 cbvSrvUavCount,
1353 &gotNewHeap))
1354 {
1355 return;
1356 }
1357 if (gotNewHeap) {
1358 qCDebug(QRHI_LOG_INFO, "Created new shader-visible CBV/SRV/UAV descriptor heap,"
1359 " per-frame slice size is now %u,"
1360 " if this happens frequently then that's not great.",
1361 shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[0].capacity);
1362 bindShaderVisibleHeaps(cbD);
1363 }
1364
1365 int rootParamIndex = 0;
1366 for (int s = 0; s < 6; ++s) {
1367 if (!visitorData.cbufs[s].isEmpty()) {
1368 for (int i = 0, count = visitorData.cbufs[s].count(); i < count; ++i) {
1369 const auto &cbuf(visitorData.cbufs[s][i]);
1370 if (QD3D12Resource *res = resourcePool.lookupRef(cbuf.first)) {
1371 quint32 offset = cbuf.second;
1372 D3D12_GPU_VIRTUAL_ADDRESS gpuAddr = res->resource->GetGPUVirtualAddress() + offset;
1373 if (cbD->currentGraphicsPipeline)
1374 cbD->cmdList->SetGraphicsRootConstantBufferView(rootParamIndex, gpuAddr);
1375 else
1376 cbD->cmdList->SetComputeRootConstantBufferView(rootParamIndex, gpuAddr);
1377 }
1378 rootParamIndex += 1;
1379 }
1380 }
1381 }
1382 for (int s = 0; s < 6; ++s) {
1383 if (!visitorData.srvs[s].isEmpty()) {
1384 QD3D12DescriptorHeap &gpuSrvHeap(shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot]);
1385 QD3D12Descriptor startDesc = gpuSrvHeap.get(visitorData.srvs[s].count());
1386 for (int i = 0, count = visitorData.srvs[s].count(); i < count; ++i) {
1387 const auto &srv(visitorData.srvs[s][i]);
1388 dev->CopyDescriptorsSimple(1, gpuSrvHeap.incremented(startDesc, i).cpuHandle, srv.cpuHandle,
1389 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
1390 }
1391
1392 if (cbD->currentGraphicsPipeline)
1393 cbD->cmdList->SetGraphicsRootDescriptorTable(rootParamIndex, startDesc.gpuHandle);
1394 else if (cbD->currentComputePipeline)
1395 cbD->cmdList->SetComputeRootDescriptorTable(rootParamIndex, startDesc.gpuHandle);
1396
1397 rootParamIndex += 1;
1398 }
1399 }
1400 for (int s = 0; s < 6; ++s) {
1401 // Samplers are one parameter / descriptor table each, and the
1402 // descriptor is from the shader visible sampler heap already.
1403 for (const QD3D12Descriptor &samplerDescriptor : visitorData.samplers[s]) {
1404 if (cbD->currentGraphicsPipeline)
1405 cbD->cmdList->SetGraphicsRootDescriptorTable(rootParamIndex, samplerDescriptor.gpuHandle);
1406 else if (cbD->currentComputePipeline)
1407 cbD->cmdList->SetComputeRootDescriptorTable(rootParamIndex, samplerDescriptor.gpuHandle);
1408
1409 rootParamIndex += 1;
1410 }
1411 }
1412 for (int s = 0; s < 6; ++s) {
1413 if (!visitorData.uavs[s].isEmpty()) {
1414 QD3D12DescriptorHeap &gpuUavHeap(shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot]);
1415 QD3D12Descriptor startDesc = gpuUavHeap.get(visitorData.uavs[s].count());
1416 for (int i = 0, count = visitorData.uavs[s].count(); i < count; ++i) {
1417 const auto &uav(visitorData.uavs[s][i]);
1418 if (QD3D12Resource *res = resourcePool.lookupRef(uav.first)) {
1419 dev->CreateUnorderedAccessView(res->resource, nullptr, &uav.second,
1420 gpuUavHeap.incremented(startDesc, i).cpuHandle);
1421 } else {
1422 dev->CreateUnorderedAccessView(nullptr, nullptr, nullptr,
1423 gpuUavHeap.incremented(startDesc, i).cpuHandle);
1424 }
1425 }
1426
1427 if (cbD->currentGraphicsPipeline)
1428 cbD->cmdList->SetGraphicsRootDescriptorTable(rootParamIndex, startDesc.gpuHandle);
1429 else if (cbD->currentComputePipeline)
1430 cbD->cmdList->SetComputeRootDescriptorTable(rootParamIndex, startDesc.gpuHandle);
1431
1432 rootParamIndex += 1;
1433 }
1434 }
1435
1436 if (gfxPsD) {
1437 cbD->currentGraphicsSrb = srb;
1438 cbD->currentComputeSrb = nullptr;
1439 } else {
1440 cbD->currentGraphicsSrb = nullptr;
1441 cbD->currentComputeSrb = srb;
1442 }
1443 cbD->currentSrbGeneration = srbD->generation;
1444 }
1445}
1446
1447void QRhiD3D12::setVertexInput(QRhiCommandBuffer *cb,
1448 int startBinding, int bindingCount, const QRhiCommandBuffer::VertexInput *bindings,
1449 QRhiBuffer *indexBuf, quint32 indexOffset, QRhiCommandBuffer::IndexFormat indexFormat)
1450{
1451 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1452 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1453
1454 bool needsBindVBuf = false;
1455 for (int i = 0; i < bindingCount; ++i) {
1456 const int inputSlot = startBinding + i;
1457 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, bindings[i].first);
1458 Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::VertexBuffer));
1459 const bool isDynamic = bufD->m_type == QRhiBuffer::Dynamic;
1460 if (isDynamic)
1461 bufD->executeHostWritesForFrameSlot(currentFrameSlot);
1462
1463 if (cbD->currentVertexBuffers[inputSlot] != bufD->handles[isDynamic ? currentFrameSlot : 0]
1464 || cbD->currentVertexOffsets[inputSlot] != bindings[i].second)
1465 {
1466 needsBindVBuf = true;
1467 cbD->currentVertexBuffers[inputSlot] = bufD->handles[isDynamic ? currentFrameSlot : 0];
1468 cbD->currentVertexOffsets[inputSlot] = bindings[i].second;
1469 }
1470 }
1471
1472 if (needsBindVBuf) {
1473 QVarLengthArray<D3D12_VERTEX_BUFFER_VIEW, 4> vbv;
1474 vbv.reserve(bindingCount);
1475
1476 QD3D12GraphicsPipeline *psD = cbD->currentGraphicsPipeline;
1477 const QRhiVertexInputLayout &inputLayout(psD->m_vertexInputLayout);
1478 const int inputBindingCount = inputLayout.cendBindings() - inputLayout.cbeginBindings();
1479
1480 for (int i = 0, ie = qMin(bindingCount, inputBindingCount); i != ie; ++i) {
1481 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, bindings[i].first);
1482 const QD3D12ObjectHandle handle = bufD->handles[bufD->m_type == QRhiBuffer::Dynamic ? currentFrameSlot : 0];
1483 const quint32 offset = bindings[i].second;
1484 const quint32 stride = inputLayout.bindingAt(i)->stride();
1485
1486 if (bufD->m_type != QRhiBuffer::Dynamic) {
1487 barrierGen.addTransitionBarrier(handle, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
1488 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1489 }
1490
1491 if (QD3D12Resource *res = resourcePool.lookupRef(handle)) {
1492 vbv.append({
1493 res->resource->GetGPUVirtualAddress() + offset,
1494 UINT(res->desc.Width - offset),
1495 stride
1496 });
1497 }
1498 }
1499
1500 cbD->cmdList->IASetVertexBuffers(UINT(startBinding), vbv.count(), vbv.constData());
1501 }
1502
1503 if (indexBuf) {
1504 QD3D12Buffer *ibufD = QRHI_RES(QD3D12Buffer, indexBuf);
1505 Q_ASSERT(ibufD->m_usage.testFlag(QRhiBuffer::IndexBuffer));
1506 const bool isDynamic = ibufD->m_type == QRhiBuffer::Dynamic;
1507 if (isDynamic)
1508 ibufD->executeHostWritesForFrameSlot(currentFrameSlot);
1509
1510 const DXGI_FORMAT dxgiFormat = indexFormat == QRhiCommandBuffer::IndexUInt16 ? DXGI_FORMAT_R16_UINT
1511 : DXGI_FORMAT_R32_UINT;
1512 if (cbD->currentIndexBuffer != ibufD->handles[isDynamic ? currentFrameSlot : 0]
1513 || cbD->currentIndexOffset != indexOffset
1514 || cbD->currentIndexFormat != dxgiFormat)
1515 {
1516 cbD->currentIndexBuffer = ibufD->handles[isDynamic ? currentFrameSlot : 0];
1517 cbD->currentIndexOffset = indexOffset;
1518 cbD->currentIndexFormat = dxgiFormat;
1519
1520 if (ibufD->m_type != QRhiBuffer::Dynamic) {
1521 barrierGen.addTransitionBarrier(cbD->currentIndexBuffer, D3D12_RESOURCE_STATE_INDEX_BUFFER);
1522 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1523 }
1524
1525 if (QD3D12Resource *res = resourcePool.lookupRef(cbD->currentIndexBuffer)) {
1526 const D3D12_INDEX_BUFFER_VIEW ibv = {
1527 res->resource->GetGPUVirtualAddress() + indexOffset,
1528 UINT(res->desc.Width - indexOffset),
1529 dxgiFormat
1530 };
1531 cbD->cmdList->IASetIndexBuffer(&ibv);
1532 }
1533 }
1534 }
1535}
1536
1537void QRhiD3D12::setDefaultScissor(QD3D12CommandBuffer *cbD)
1538{
1539 cbD->hasCustomScissorSet = false;
1540
1541 const QSize outputSize = cbD->currentTarget->pixelSize();
1542 std::array<float, 4> vp = cbD->currentViewport.viewport();
1543 float x = 0, y = 0, w = 0, h = 0;
1544
1545 if (qFuzzyIsNull(vp[2]) && qFuzzyIsNull(vp[3])) {
1546 x = 0;
1547 y = 0;
1548 w = outputSize.width();
1549 h = outputSize.height();
1550 } else {
1551 // x,y is top-left in D3D12_RECT but bottom-left in QRhiScissor
1552 qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, vp, &x, &y, &w, &h);
1553 }
1554
1555 D3D12_RECT r;
1556 r.left = x;
1557 r.top = y;
1558 // right and bottom are exclusive
1559 r.right = x + w;
1560 r.bottom = y + h;
1561 cbD->cmdList->RSSetScissorRects(1, &r);
1562}
1563
1564void QRhiD3D12::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport)
1565{
1566 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1567 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1568 Q_ASSERT(cbD->currentTarget);
1569 const QSize outputSize = cbD->currentTarget->pixelSize();
1570
1571 // D3D expects top-left, QRhiViewport is bottom-left
1572 float x, y, w, h;
1573 if (!qrhi_toTopLeftRenderTargetRect<UnBounded>(outputSize, viewport.viewport(), &x, &y, &w, &h))
1574 return;
1575
1576 D3D12_VIEWPORT v;
1577 v.TopLeftX = x;
1578 v.TopLeftY = y;
1579 v.Width = w;
1580 v.Height = h;
1581 v.MinDepth = viewport.minDepth();
1582 v.MaxDepth = viewport.maxDepth();
1583 cbD->cmdList->RSSetViewports(1, &v);
1584
1585 cbD->currentViewport = viewport;
1586 if (cbD->currentGraphicsPipeline
1587 && !cbD->currentGraphicsPipeline->flags().testFlag(QRhiGraphicsPipeline::UsesScissor))
1588 {
1589 setDefaultScissor(cbD);
1590 }
1591}
1592
1593void QRhiD3D12::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor)
1594{
1595 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1596 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1597 Q_ASSERT(cbD->currentTarget);
1598 const QSize outputSize = cbD->currentTarget->pixelSize();
1599
1600 // D3D expects top-left, QRhiScissor is bottom-left
1601 int x, y, w, h;
1602 if (!qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, scissor.scissor(), &x, &y, &w, &h))
1603 return;
1604
1605 D3D12_RECT r;
1606 r.left = x;
1607 r.top = y;
1608 // right and bottom are exclusive
1609 r.right = x + w;
1610 r.bottom = y + h;
1611 cbD->cmdList->RSSetScissorRects(1, &r);
1612
1613 cbD->hasCustomScissorSet = true;
1614}
1615
1616void QRhiD3D12::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c)
1617{
1618 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1619 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1620 float v[4] = { c.redF(), c.greenF(), c.blueF(), c.alphaF() };
1621 cbD->cmdList->OMSetBlendFactor(v);
1622}
1623
1624void QRhiD3D12::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue)
1625{
1626 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1627 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1628 cbD->cmdList->OMSetStencilRef(refValue);
1629}
1630
1631static inline D3D12_SHADING_RATE toD3DShadingRate(const QSize &coarsePixelSize)
1632{
1633 if (coarsePixelSize == QSize(1, 2))
1634 return D3D12_SHADING_RATE_1X2;
1635 if (coarsePixelSize == QSize(2, 1))
1636 return D3D12_SHADING_RATE_2X1;
1637 if (coarsePixelSize == QSize(2, 2))
1638 return D3D12_SHADING_RATE_2X2;
1639 if (coarsePixelSize == QSize(2, 4))
1640 return D3D12_SHADING_RATE_2X4;
1641 if (coarsePixelSize == QSize(4, 2))
1642 return D3D12_SHADING_RATE_4X2;
1643 if (coarsePixelSize == QSize(4, 4))
1644 return D3D12_SHADING_RATE_4X4;
1645 return D3D12_SHADING_RATE_1X1;
1646}
1647
1648void QRhiD3D12::setShadingRate(QRhiCommandBuffer *cb, const QSize &coarsePixelSize)
1649{
1650 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1651 cbD->hasShadingRateSet = false;
1652
1653#ifdef QRHI_D3D12_CL5_AVAILABLE
1654 if (!caps.vrs)
1655 return;
1656
1657 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1658 const D3D12_SHADING_RATE_COMBINER combiners[] = { D3D12_SHADING_RATE_COMBINER_MAX, D3D12_SHADING_RATE_COMBINER_MAX };
1659 cbD->cmdList->RSSetShadingRate(toD3DShadingRate(coarsePixelSize), combiners);
1660 if (coarsePixelSize.width() != 1 || coarsePixelSize.height() != 1)
1661 cbD->hasShadingRateSet = true;
1662#else
1663 Q_UNUSED(cb);
1664 Q_UNUSED(coarsePixelSize);
1665 qWarning("Attempted to set ShadingRate without building Qt against a sufficiently new Windows SDK and d3d12.h. This cannot work.");
1666#endif
1667}
1668
1669void QRhiD3D12::draw(QRhiCommandBuffer *cb, quint32 vertexCount,
1670 quint32 instanceCount, quint32 firstVertex, quint32 firstInstance)
1671{
1672 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1673 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1674 cbD->cmdList->DrawInstanced(vertexCount, instanceCount, firstVertex, firstInstance);
1675}
1676
1677void QRhiD3D12::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount,
1678 quint32 instanceCount, quint32 firstIndex, qint32 vertexOffset, quint32 firstInstance)
1679{
1680 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1681 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1682 cbD->cmdList->DrawIndexedInstanced(indexCount, instanceCount,
1683 firstIndex, vertexOffset,
1684 firstInstance);
1685}
1686
1687void QRhiD3D12::drawIndirect(QRhiCommandBuffer *cb, QRhiBuffer *indirectBuffer,
1688 quint32 indirectBufferOffset, quint32 drawCount, quint32 stride)
1689{
1690 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1691 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1692
1693 QD3D12Buffer *indirectBufferD = QRHI_RES(QD3D12Buffer, indirectBuffer);
1694 const bool isDynamic = indirectBufferD->m_type == QRhiBuffer::Dynamic;
1695 const QD3D12ObjectHandle indirectBufferHandle = indirectBufferD->handles[isDynamic ? currentFrameSlot : 0];
1696 if (isDynamic) {
1697 indirectBufferD->executeHostWritesForFrameSlot(currentFrameSlot);
1698 } else {
1699 barrierGen.addTransitionBarrier(indirectBufferHandle, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
1700 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1701 }
1702 QD3D12Resource *indirectRes = resourcePool.lookupRef(indirectBufferHandle);
1703 if (!indirectRes)
1704 return;
1705 ID3D12Resource *indirectBufferRes = indirectRes->resource;
1706
1707 const bool canUseMulti = (stride == sizeof(QRhiIndirectDrawCommand) && drawCommandSignature);
1708
1709 if (canUseMulti && drawCount > 1) {
1710 cbD->cmdList->ExecuteIndirect(drawCommandSignature, drawCount,
1711 indirectBufferRes, indirectBufferOffset,
1712 nullptr, 0);
1713 } else {
1714 UINT offset = indirectBufferOffset;
1715 for (quint32 i = 0; i < drawCount; ++i) {
1716 cbD->cmdList->ExecuteIndirect(drawCommandSignature, 1,
1717 indirectBufferRes, offset,
1718 nullptr, 0);
1719 offset += stride;
1720 }
1721 }
1722}
1723
1724void QRhiD3D12::drawIndexedIndirect(QRhiCommandBuffer *cb, QRhiBuffer *indirectBuffer,
1725 quint32 indirectBufferOffset, quint32 drawCount, quint32 stride)
1726{
1727 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1728 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
1729
1730 QD3D12Buffer *indirectBufferD = QRHI_RES(QD3D12Buffer, indirectBuffer);
1731 const bool isDynamic = indirectBufferD->m_type == QRhiBuffer::Dynamic;
1732 const QD3D12ObjectHandle indirectBufferHandle = indirectBufferD->handles[isDynamic ? currentFrameSlot : 0];
1733 if (isDynamic) {
1734 indirectBufferD->executeHostWritesForFrameSlot(currentFrameSlot);
1735 } else {
1736 barrierGen.addTransitionBarrier(indirectBufferHandle, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
1737 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1738 }
1739 QD3D12Resource *indirectRes = resourcePool.lookupRef(indirectBufferHandle);
1740 if (!indirectRes)
1741 return;
1742 ID3D12Resource *indirectBufferRes = indirectRes->resource;
1743
1744 const bool canUseMulti = (stride == sizeof(QRhiIndexedIndirectDrawCommand) && drawIndexedCommandSignature);
1745
1746 if (canUseMulti && drawCount > 1) {
1747 cbD->cmdList->ExecuteIndirect(drawIndexedCommandSignature, drawCount,
1748 indirectBufferRes, indirectBufferOffset,
1749 nullptr, 0);
1750 } else {
1751 UINT offset = indirectBufferOffset;
1752 for (quint32 i = 0; i < drawCount; ++i) {
1753 cbD->cmdList->ExecuteIndirect(drawIndexedCommandSignature, 1,
1754 indirectBufferRes, offset,
1755 nullptr, 0);
1756 offset += stride;
1757 }
1758 }
1759}
1760
1761void QRhiD3D12::debugMarkBegin(QRhiCommandBuffer *cb, const QByteArray &name)
1762{
1763 if (!debugMarkers)
1764 return;
1765
1766 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1767#ifdef QRHI_D3D12_HAS_OLD_PIX
1768 PIXBeginEvent(cbD->cmdList, PIX_COLOR_DEFAULT, reinterpret_cast<LPCWSTR>(QString::fromLatin1(name).utf16()));
1769#else
1770 Q_UNUSED(cbD);
1771 Q_UNUSED(name);
1772#endif
1773}
1774
1775void QRhiD3D12::debugMarkEnd(QRhiCommandBuffer *cb)
1776{
1777 if (!debugMarkers)
1778 return;
1779
1780 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1781#ifdef QRHI_D3D12_HAS_OLD_PIX
1782 PIXEndEvent(cbD->cmdList);
1783#else
1784 Q_UNUSED(cbD);
1785#endif
1786}
1787
1788void QRhiD3D12::debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg)
1789{
1790 if (!debugMarkers)
1791 return;
1792
1793 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1794#ifdef QRHI_D3D12_HAS_OLD_PIX
1795 PIXSetMarker(cbD->cmdList, PIX_COLOR_DEFAULT, reinterpret_cast<LPCWSTR>(QString::fromLatin1(msg).utf16()));
1796#else
1797 Q_UNUSED(cbD);
1798 Q_UNUSED(msg);
1799#endif
1800}
1801
1802const QRhiNativeHandles *QRhiD3D12::nativeHandles(QRhiCommandBuffer *cb)
1803{
1804 return QRHI_RES(QD3D12CommandBuffer, cb)->nativeHandles();
1805}
1806
1807void QRhiD3D12::beginExternal(QRhiCommandBuffer *cb)
1808{
1809 Q_UNUSED(cb);
1810}
1811
1812void QRhiD3D12::endExternal(QRhiCommandBuffer *cb)
1813{
1814 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1815 cbD->resetPerPassState();
1816 bindShaderVisibleHeaps(cbD);
1817 if (cbD->currentTarget) { // could be compute, no rendertarget then
1818 QD3D12RenderTargetData *rtD = rtData(cbD->currentTarget);
1819 cbD->cmdList->OMSetRenderTargets(UINT(rtD->colorAttCount),
1820 rtD->rtv,
1821 TRUE,
1822 rtD->dsAttCount ? &rtD->dsv : nullptr);
1823 }
1824}
1825
1826double QRhiD3D12::lastCompletedGpuTime(QRhiCommandBuffer *cb)
1827{
1828 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
1829 return cbD->lastGpuTime;
1830}
1831
1832static void calculateGpuTime(QD3D12CommandBuffer *cbD,
1833 int timestampPairStartIndex,
1834 const quint8 *readbackBufPtr,
1835 quint64 timestampTicksPerSecond)
1836{
1837 const size_t byteOffset = timestampPairStartIndex * sizeof(quint64);
1838 const quint64 *p = reinterpret_cast<const quint64 *>(readbackBufPtr + byteOffset);
1839 const quint64 startTime = *p++;
1840 const quint64 endTime = *p;
1841 if (startTime < endTime) {
1842 const quint64 ticks = endTime - startTime;
1843 const double timeSec = ticks / double(timestampTicksPerSecond);
1844 cbD->lastGpuTime = timeSec;
1845 }
1846}
1847
1848QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
1849{
1850 Q_UNUSED(flags);
1851
1852 QD3D12SwapChain *swapChainD = QRHI_RES(QD3D12SwapChain, swapChain);
1853 currentSwapChain = swapChainD;
1854 currentFrameSlot = swapChainD->currentFrameSlot;
1855 QD3D12SwapChain::FrameResources &fr(swapChainD->frameRes[currentFrameSlot]);
1856
1857 // We could do smarter things but mirror the Vulkan backend for now: Make
1858 // sure the previous commands for this same frame slot have finished. Do
1859 // this also for any other swapchain's commands with the same frame slot.
1860 // While this reduces concurrency in render-to-swapchain-A,
1861 // render-to-swapchain-B, repeat kind of scenarios, it keeps resource usage
1862 // safe: swapchain A starting its frame 0, followed by swapchain B starting
1863 // its own frame 0 will make B wait for A's frame 0 commands. If a resource
1864 // is written in B's frame or when B checks for pending resource releases,
1865 // that won't mess up A's in-flight commands (as they are guaranteed not to
1866 // be in flight anymore). With Qt Quick this situation cannot happen anyway
1867 // by design (one QRhi per window).
1868 for (QD3D12SwapChain *sc : std::as_const(swapchains))
1869 sc->waitCommandCompletionForFrameSlot(currentFrameSlot); // note: swapChainD->currentFrameSlot, not sc's
1870
1871 if (swapChainD->frameLatencyWaitableObject) {
1872 // only wait when endFrame() called Present(), otherwise this would become a 1 sec timeout
1873 if (swapChainD->lastFrameLatencyWaitSlot != currentFrameSlot) {
1874 WaitForSingleObjectEx(swapChainD->frameLatencyWaitableObject, 1000, true);
1875 swapChainD->lastFrameLatencyWaitSlot = currentFrameSlot;
1876 }
1877 }
1878
1879 HRESULT hr = cmdAllocators[currentFrameSlot]->Reset();
1880 if (FAILED(hr)) {
1881 qWarning("Failed to reset command allocator: %s",
1882 qPrintable(QSystemError::windowsComString(hr)));
1883 return QRhi::FrameOpError;
1884 }
1885
1886 if (!startCommandListForCurrentFrameSlot(&fr.cmdList))
1887 return QRhi::FrameOpError;
1888
1889 QD3D12CommandBuffer *cbD = &swapChainD->cbWrapper;
1890 cbD->cmdList = fr.cmdList;
1891
1892 swapChainD->rtWrapper.d.rtv[0] = swapChainD->sampleDesc.Count > 1
1893 ? swapChainD->msaaRtvs[swapChainD->currentBackBufferIndex].cpuHandle
1894 : swapChainD->rtvs[swapChainD->currentBackBufferIndex].cpuHandle;
1895
1896 swapChainD->rtWrapper.d.dsv = swapChainD->ds ? swapChainD->ds->dsv.cpuHandle
1897 : D3D12_CPU_DESCRIPTOR_HANDLE { 0 };
1898
1899 if (swapChainD->stereo) {
1900 swapChainD->rtWrapperRight.d.rtv[0] = swapChainD->sampleDesc.Count > 1
1901 ? swapChainD->msaaRtvs[swapChainD->currentBackBufferIndex].cpuHandle
1902 : swapChainD->rtvsRight[swapChainD->currentBackBufferIndex].cpuHandle;
1903
1904 swapChainD->rtWrapperRight.d.dsv =
1905 swapChainD->ds ? swapChainD->ds->dsv.cpuHandle : D3D12_CPU_DESCRIPTOR_HANDLE{ 0 };
1906 }
1907
1908
1909 // Time to release things that are marked for currentFrameSlot since due to
1910 // the wait above we know that the previous commands on the GPU for this
1911 // slot must have finished already.
1912 releaseQueue.executeDeferredReleases(currentFrameSlot);
1913
1914 // Full reset of the command buffer data.
1915 cbD->resetState();
1916
1917 // Move the head back to zero for the per-frame shader-visible descriptor heap work areas.
1918 shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot].head = 0;
1919 // Same for the small staging area.
1920 smallStagingAreas[currentFrameSlot].head = 0;
1921
1922 bindShaderVisibleHeaps(cbD);
1923
1924 finishActiveReadbacks(); // last, in case the readback-completed callback issues rhi calls
1925
1926 if (timestampQueryHeap.isValid() && timestampTicksPerSecond) {
1927 // Read the timestamps for the previous frame for this slot. (the
1928 // ResolveQuery() should have completed by now due to the wait above)
1929 const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT;
1930 calculateGpuTime(cbD,
1931 timestampPairStartIndex,
1932 timestampReadbackArea.mem.p,
1933 timestampTicksPerSecond);
1934 // Write the start timestamp for this frame for this slot.
1935 cbD->cmdList->EndQuery(timestampQueryHeap.heap,
1936 D3D12_QUERY_TYPE_TIMESTAMP,
1937 timestampPairStartIndex);
1938 }
1939
1940 QDxgiVSyncService::instance()->beginFrame(adapterLuid);
1941
1942 return QRhi::FrameOpSuccess;
1943}
1944
1945QRhi::FrameOpResult QRhiD3D12::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrameFlags flags)
1946{
1947 QD3D12SwapChain *swapChainD = QRHI_RES(QD3D12SwapChain, swapChain);
1948 Q_ASSERT(currentSwapChain == swapChainD);
1949 QD3D12CommandBuffer *cbD = &swapChainD->cbWrapper;
1950
1951 QD3D12ObjectHandle backBufferResourceHandle = swapChainD->colorBuffers[swapChainD->currentBackBufferIndex];
1952 if (swapChainD->sampleDesc.Count > 1) {
1953 QD3D12ObjectHandle msaaBackBufferResourceHandle = swapChainD->msaaBuffers[swapChainD->currentBackBufferIndex];
1954 barrierGen.addTransitionBarrier(msaaBackBufferResourceHandle, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
1955 barrierGen.addTransitionBarrier(backBufferResourceHandle, D3D12_RESOURCE_STATE_RESOLVE_DEST);
1956 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1957 const QD3D12Resource *src = resourcePool.lookupRef(msaaBackBufferResourceHandle);
1958 const QD3D12Resource *dst = resourcePool.lookupRef(backBufferResourceHandle);
1959 if (src && dst)
1960 cbD->cmdList->ResolveSubresource(dst->resource, 0, src->resource, 0, swapChainD->colorFormat);
1961 }
1962
1963 barrierGen.addTransitionBarrier(backBufferResourceHandle, D3D12_RESOURCE_STATE_PRESENT);
1964 barrierGen.enqueueBufferedTransitionBarriers(cbD);
1965
1966 if (timestampQueryHeap.isValid()) {
1967 const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT;
1968 cbD->cmdList->EndQuery(timestampQueryHeap.heap,
1969 D3D12_QUERY_TYPE_TIMESTAMP,
1970 timestampPairStartIndex + 1);
1971 cbD->cmdList->ResolveQueryData(timestampQueryHeap.heap,
1972 D3D12_QUERY_TYPE_TIMESTAMP,
1973 timestampPairStartIndex,
1974 2,
1975 timestampReadbackArea.mem.buffer,
1976 timestampPairStartIndex * sizeof(quint64));
1977 }
1978
1979 D3D12GraphicsCommandList *cmdList = cbD->cmdList;
1980 HRESULT hr = cmdList->Close();
1981 if (FAILED(hr)) {
1982 qWarning("Failed to close command list: %s",
1983 qPrintable(QSystemError::windowsComString(hr)));
1984 return QRhi::FrameOpError;
1985 }
1986
1987 ID3D12CommandList *execList[] = { cmdList };
1988 cmdQueue->ExecuteCommandLists(1, execList);
1989
1990 if (!flags.testFlag(QRhi::SkipPresent)) {
1991 UINT presentFlags = 0;
1992 if (swapChainD->swapInterval == 0
1993 && (swapChainD->swapChainFlags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING))
1994 {
1995 presentFlags |= DXGI_PRESENT_ALLOW_TEARING;
1996 }
1997 if (!swapChainD->swapChain) {
1998 qWarning("Failed to present, no swapchain");
1999 return QRhi::FrameOpError;
2000 }
2001 HRESULT hr = swapChainD->swapChain->Present(swapChainD->swapInterval, presentFlags);
2002 if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
2003 qWarning("Device loss detected in Present()");
2004 deviceLost = true;
2005 return QRhi::FrameOpDeviceLost;
2006 } else if (FAILED(hr)) {
2007 qWarning("Failed to present: %s", qPrintable(QSystemError::windowsComString(hr)));
2008 return QRhi::FrameOpError;
2009 }
2010
2011 if (dcompDevice && swapChainD->dcompTarget && swapChainD->dcompVisual)
2012 dcompDevice->Commit();
2013 }
2014
2015 swapChainD->addCommandCompletionSignalForCurrentFrameSlot();
2016
2017 // NB! The deferred-release mechanism here differs from the older QRhi
2018 // backends. There is no lastActiveFrameSlot tracking. Instead,
2019 // currentFrameSlot is written to the registered entries now, and so the
2020 // resources will get released in the frames_in_flight'th beginFrame()
2021 // counting starting from now.
2022 releaseQueue.activatePendingDeferredReleaseRequests(currentFrameSlot);
2023
2024 if (!flags.testFlag(QRhi::SkipPresent)) {
2025 // Only move to the next slot if we presented. Otherwise will block and
2026 // wait for completion in the next beginFrame already, but SkipPresent
2027 // should be infrequent anyway.
2028 swapChainD->currentFrameSlot = (swapChainD->currentFrameSlot + 1) % QD3D12_FRAMES_IN_FLIGHT;
2029 swapChainD->currentBackBufferIndex = swapChainD->swapChain->GetCurrentBackBufferIndex();
2030 }
2031
2032 currentSwapChain = nullptr;
2033 return QRhi::FrameOpSuccess;
2034}
2035
2036QRhi::FrameOpResult QRhiD3D12::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi::BeginFrameFlags flags)
2037{
2038 Q_UNUSED(flags);
2039
2040 // Switch to the next slot manually. Swapchains do not know about this
2041 // which is good. So for example an onscreen, onscreen, offscreen,
2042 // onscreen, onscreen, onscreen sequence of frames leads to 0, 1, 0, 0, 1,
2043 // 0. (no strict alternation anymore) But this is not different from what
2044 // happens when multiple swapchains are involved. Offscreen frames are
2045 // synchronous anyway in the sense that they wait for execution to complete
2046 // in endOffscreenFrame, so no resources used in that frame are busy
2047 // anymore in the next frame.
2048
2049 currentFrameSlot = (currentFrameSlot + 1) % QD3D12_FRAMES_IN_FLIGHT;
2050
2051 for (QD3D12SwapChain *sc : std::as_const(swapchains))
2052 sc->waitCommandCompletionForFrameSlot(currentFrameSlot); // note: not sc's currentFrameSlot
2053
2054 HRESULT hr = cmdAllocators[currentFrameSlot]->Reset();
2055 if (FAILED(hr)) {
2056 qWarning("Failed to reset command allocator: %s",
2057 qPrintable(QSystemError::windowsComString(hr)));
2058 return QRhi::FrameOpError;
2059 }
2060
2061 if (!offscreenCb[currentFrameSlot])
2062 offscreenCb[currentFrameSlot] = new QD3D12CommandBuffer(this);
2063 QD3D12CommandBuffer *cbD = offscreenCb[currentFrameSlot];
2064 if (!startCommandListForCurrentFrameSlot(&cbD->cmdList))
2065 return QRhi::FrameOpError;
2066
2067 releaseQueue.executeDeferredReleases(currentFrameSlot);
2068 cbD->resetState();
2069 shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot].head = 0;
2070 smallStagingAreas[currentFrameSlot].head = 0;
2071
2072 bindShaderVisibleHeaps(cbD);
2073
2074 if (timestampQueryHeap.isValid() && timestampTicksPerSecond) {
2075 cbD->cmdList->EndQuery(timestampQueryHeap.heap,
2076 D3D12_QUERY_TYPE_TIMESTAMP,
2077 currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT);
2078 }
2079
2080 offscreenActive = true;
2081 *cb = cbD;
2082
2083 return QRhi::FrameOpSuccess;
2084}
2085
2086QRhi::FrameOpResult QRhiD3D12::endOffscreenFrame(QRhi::EndFrameFlags flags)
2087{
2088 Q_UNUSED(flags);
2089 Q_ASSERT(offscreenActive);
2090 offscreenActive = false;
2091
2092 QD3D12CommandBuffer *cbD = offscreenCb[currentFrameSlot];
2093 if (timestampQueryHeap.isValid()) {
2094 const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT;
2095 cbD->cmdList->EndQuery(timestampQueryHeap.heap,
2096 D3D12_QUERY_TYPE_TIMESTAMP,
2097 timestampPairStartIndex + 1);
2098 cbD->cmdList->ResolveQueryData(timestampQueryHeap.heap,
2099 D3D12_QUERY_TYPE_TIMESTAMP,
2100 timestampPairStartIndex,
2101 2,
2102 timestampReadbackArea.mem.buffer,
2103 timestampPairStartIndex * sizeof(quint64));
2104 }
2105
2106 D3D12GraphicsCommandList *cmdList = cbD->cmdList;
2107 HRESULT hr = cmdList->Close();
2108 if (FAILED(hr)) {
2109 qWarning("Failed to close command list: %s",
2110 qPrintable(QSystemError::windowsComString(hr)));
2111 return QRhi::FrameOpError;
2112 }
2113
2114 ID3D12CommandList *execList[] = { cmdList };
2115 cmdQueue->ExecuteCommandLists(1, execList);
2116
2117 releaseQueue.activatePendingDeferredReleaseRequests(currentFrameSlot);
2118
2119 // wait for completion
2120 waitGpu();
2121
2122 // Here we know that executing the host-side reads for this (or any
2123 // previous) frame is safe since we waited for completion above.
2124 finishActiveReadbacks(true);
2125
2126 // the timestamp query results should be available too, given the wait
2127 if (timestampQueryHeap.isValid()) {
2128 calculateGpuTime(cbD,
2129 currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT,
2130 timestampReadbackArea.mem.p,
2131 timestampTicksPerSecond);
2132 }
2133
2134 return QRhi::FrameOpSuccess;
2135}
2136
2137QRhi::FrameOpResult QRhiD3D12::finish()
2138{
2139 QD3D12CommandBuffer *cbD = nullptr;
2140 if (inFrame) {
2141 if (offscreenActive) {
2142 Q_ASSERT(!currentSwapChain);
2143 cbD = offscreenCb[currentFrameSlot];
2144 } else {
2145 Q_ASSERT(currentSwapChain);
2146 cbD = &currentSwapChain->cbWrapper;
2147 }
2148 if (!cbD)
2149 return QRhi::FrameOpError;
2150
2151 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::NoPass);
2152
2153 D3D12GraphicsCommandList *cmdList = cbD->cmdList;
2154 HRESULT hr = cmdList->Close();
2155 if (FAILED(hr)) {
2156 qWarning("Failed to close command list: %s",
2157 qPrintable(QSystemError::windowsComString(hr)));
2158 return QRhi::FrameOpError;
2159 }
2160
2161 ID3D12CommandList *execList[] = { cmdList };
2162 cmdQueue->ExecuteCommandLists(1, execList);
2163
2164 releaseQueue.activatePendingDeferredReleaseRequests(currentFrameSlot);
2165 }
2166
2167 // full blocking wait for everything, frame slots do not matter now
2168 waitGpu();
2169
2170 if (inFrame) {
2171 HRESULT hr = cmdAllocators[currentFrameSlot]->Reset();
2172 if (FAILED(hr)) {
2173 qWarning("Failed to reset command allocator: %s",
2174 qPrintable(QSystemError::windowsComString(hr)));
2175 return QRhi::FrameOpError;
2176 }
2177
2178 if (!startCommandListForCurrentFrameSlot(&cbD->cmdList))
2179 return QRhi::FrameOpError;
2180
2181 cbD->resetState();
2182
2183 shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[currentFrameSlot].head = 0;
2184 smallStagingAreas[currentFrameSlot].head = 0;
2185
2186 bindShaderVisibleHeaps(cbD);
2187 }
2188
2189 releaseQueue.releaseAll();
2190 finishActiveReadbacks(true);
2191
2192 return QRhi::FrameOpSuccess;
2193}
2194
2195void QRhiD3D12::resourceUpdate(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
2196{
2197 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2198 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::NoPass);
2199 enqueueResourceUpdates(cbD, resourceUpdates);
2200}
2201
2202void QRhiD3D12::beginPass(QRhiCommandBuffer *cb,
2203 QRhiRenderTarget *rt,
2204 const QColor &colorClearValue,
2205 const QRhiDepthStencilClearValue &depthStencilClearValue,
2206 QRhiResourceUpdateBatch *resourceUpdates,
2207 QRhiCommandBuffer::BeginPassFlags)
2208{
2209 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2210 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::NoPass);
2211
2212 if (resourceUpdates)
2213 enqueueResourceUpdates(cbD, resourceUpdates);
2214
2215 QD3D12RenderTargetData *rtD = rtData(rt);
2216 bool wantsColorClear = true;
2217 bool wantsDsClear = true;
2218 if (rt->resourceType() == QRhiRenderTarget::TextureRenderTarget) {
2219 QD3D12TextureRenderTarget *rtTex = QRHI_RES(QD3D12TextureRenderTarget, rt);
2220 wantsColorClear = !rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents);
2221 wantsDsClear = !rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents);
2222 if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QD3D12Texture, QD3D12RenderBuffer>(rtTex->description(), rtD->currentResIdList))
2223 rtTex->create();
2224
2225 for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments(); it != itEnd; ++it) {
2226 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, it->texture());
2227 QD3D12Texture *resolveTexD = QRHI_RES(QD3D12Texture, it->resolveTexture());
2228 QD3D12RenderBuffer *rbD = QRHI_RES(QD3D12RenderBuffer, it->renderBuffer());
2229 if (texD)
2230 barrierGen.addTransitionBarrier(texD->handle, D3D12_RESOURCE_STATE_RENDER_TARGET);
2231 else if (rbD)
2232 barrierGen.addTransitionBarrier(rbD->handle, D3D12_RESOURCE_STATE_RENDER_TARGET);
2233 if (resolveTexD)
2234 barrierGen.addTransitionBarrier(resolveTexD->handle, D3D12_RESOURCE_STATE_RENDER_TARGET);
2235 }
2236 if (rtTex->m_desc.depthStencilBuffer()) {
2237 QD3D12RenderBuffer *rbD = QRHI_RES(QD3D12RenderBuffer, rtTex->m_desc.depthStencilBuffer());
2238 Q_ASSERT(rbD->m_type == QRhiRenderBuffer::DepthStencil);
2239 barrierGen.addTransitionBarrier(rbD->handle, D3D12_RESOURCE_STATE_DEPTH_WRITE);
2240 } else if (rtTex->m_desc.depthTexture()) {
2241 QD3D12Texture *depthTexD = QRHI_RES(QD3D12Texture, rtTex->m_desc.depthTexture());
2242 barrierGen.addTransitionBarrier(depthTexD->handle, D3D12_RESOURCE_STATE_DEPTH_WRITE);
2243 }
2244 barrierGen.enqueueBufferedTransitionBarriers(cbD);
2245 } else {
2246 Q_ASSERT(currentSwapChain);
2247 barrierGen.addTransitionBarrier(currentSwapChain->sampleDesc.Count > 1
2248 ? currentSwapChain->msaaBuffers[currentSwapChain->currentBackBufferIndex]
2249 : currentSwapChain->colorBuffers[currentSwapChain->currentBackBufferIndex],
2250 D3D12_RESOURCE_STATE_RENDER_TARGET);
2251 barrierGen.enqueueBufferedTransitionBarriers(cbD);
2252 }
2253
2254 cbD->cmdList->OMSetRenderTargets(UINT(rtD->colorAttCount),
2255 rtD->rtv,
2256 TRUE,
2257 rtD->dsAttCount ? &rtD->dsv : nullptr);
2258
2259 if (rtD->colorAttCount && wantsColorClear) {
2260 float clearColor[4] = {
2261 colorClearValue.redF(),
2262 colorClearValue.greenF(),
2263 colorClearValue.blueF(),
2264 colorClearValue.alphaF()
2265 };
2266 for (int i = 0; i < rtD->colorAttCount; ++i)
2267 cbD->cmdList->ClearRenderTargetView(rtD->rtv[i], clearColor, 0, nullptr);
2268 }
2269 if (rtD->dsAttCount && wantsDsClear) {
2270 cbD->cmdList->ClearDepthStencilView(rtD->dsv,
2271 D3D12_CLEAR_FLAGS(D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL),
2272 depthStencilClearValue.depthClearValue(),
2273 UINT8(depthStencilClearValue.stencilClearValue()),
2274 0,
2275 nullptr);
2276 }
2277
2278 cbD->recordingPass = QD3D12CommandBuffer::RenderPass;
2279 cbD->currentTarget = rt;
2280
2281 bool hasShadingRateMapSet = false;
2282#ifdef QRHI_D3D12_CL5_AVAILABLE
2283 if (rtD->rp->hasShadingRateMap) {
2284 cbD->setShadingRate(QSize(1, 1));
2285 QD3D12ShadingRateMap *rateMapD = rt->resourceType() == QRhiRenderTarget::TextureRenderTarget
2286 ? QRHI_RES(QD3D12ShadingRateMap, QRHI_RES(QD3D12TextureRenderTarget, rt)->m_desc.shadingRateMap())
2287 : QRHI_RES(QD3D12ShadingRateMap, QRHI_RES(QD3D12SwapChainRenderTarget, rt)->swapChain()->shadingRateMap());
2288 if (QD3D12Resource *res = resourcePool.lookupRef(rateMapD->handle)) {
2289 barrierGen.addTransitionBarrier(rateMapD->handle, D3D12_RESOURCE_STATE_SHADING_RATE_SOURCE);
2290 barrierGen.enqueueBufferedTransitionBarriers(cbD);
2291 cbD->cmdList->RSSetShadingRateImage(res->resource);
2292 hasShadingRateMapSet = true;
2293 }
2294 } else if (cbD->hasShadingRateMapSet) {
2295 cbD->cmdList->RSSetShadingRateImage(nullptr);
2296 cbD->setShadingRate(QSize(1, 1));
2297 } else if (cbD->hasShadingRateSet) {
2298 cbD->setShadingRate(QSize(1, 1));
2299 }
2300#endif
2301
2302 cbD->resetPerPassState();
2303
2304 // shading rate tracking is reset in resetPerPassState(), sync what we did just above
2305 cbD->hasShadingRateMapSet = hasShadingRateMapSet;
2306}
2307
2308void QRhiD3D12::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
2309{
2310 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2311 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::RenderPass);
2312
2313 if (cbD->currentTarget->resourceType() == QRhiResource::TextureRenderTarget) {
2314 QD3D12TextureRenderTarget *rtTex = QRHI_RES(QD3D12TextureRenderTarget, cbD->currentTarget);
2315 for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments();
2316 it != itEnd; ++it)
2317 {
2318 const QRhiColorAttachment &colorAtt(*it);
2319 if (!colorAtt.resolveTexture())
2320 continue;
2321
2322 QD3D12Texture *dstTexD = QRHI_RES(QD3D12Texture, colorAtt.resolveTexture());
2323 QD3D12Resource *dstRes = resourcePool.lookupRef(dstTexD->handle);
2324 if (!dstRes)
2325 continue;
2326
2327 QD3D12Texture *srcTexD = QRHI_RES(QD3D12Texture, colorAtt.texture());
2328 QD3D12RenderBuffer *srcRbD = QRHI_RES(QD3D12RenderBuffer, colorAtt.renderBuffer());
2329 Q_ASSERT(srcTexD || srcRbD);
2330 QD3D12Resource *srcRes = resourcePool.lookupRef(srcTexD ? srcTexD->handle : srcRbD->handle);
2331 if (!srcRes)
2332 continue;
2333
2334 if (srcTexD) {
2335 if (srcTexD->dxgiFormat != dstTexD->dxgiFormat) {
2336 qWarning("Resolve source (%d) and destination (%d) formats do not match",
2337 int(srcTexD->dxgiFormat), int(dstTexD->dxgiFormat));
2338 continue;
2339 }
2340 if (srcTexD->sampleDesc.Count <= 1) {
2341 qWarning("Cannot resolve a non-multisample texture");
2342 continue;
2343 }
2344 if (srcTexD->m_pixelSize != dstTexD->m_pixelSize) {
2345 qWarning("Resolve source and destination sizes do not match");
2346 continue;
2347 }
2348 } else {
2349 if (srcRbD->dxgiFormat != dstTexD->dxgiFormat) {
2350 qWarning("Resolve source (%d) and destination (%d) formats do not match",
2351 int(srcRbD->dxgiFormat), int(dstTexD->dxgiFormat));
2352 continue;
2353 }
2354 if (srcRbD->m_pixelSize != dstTexD->m_pixelSize) {
2355 qWarning("Resolve source and destination sizes do not match");
2356 continue;
2357 }
2358 }
2359
2360 barrierGen.addTransitionBarrier(srcTexD ? srcTexD->handle : srcRbD->handle, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
2361 barrierGen.addTransitionBarrier(dstTexD->handle, D3D12_RESOURCE_STATE_RESOLVE_DEST);
2362 barrierGen.enqueueBufferedTransitionBarriers(cbD);
2363
2364 const UINT resolveCount = colorAtt.multiViewCount() >= 2 ? colorAtt.multiViewCount() : 1;
2365 for (UINT resolveIdx = 0; resolveIdx < resolveCount; ++resolveIdx) {
2366 const UINT srcSubresource = calcSubresource(0, UINT(colorAtt.layer()) + resolveIdx, 1);
2367 const UINT dstSubresource = calcSubresource(UINT(colorAtt.resolveLevel()),
2368 UINT(colorAtt.resolveLayer()) + resolveIdx,
2369 dstTexD->mipLevelCount);
2370 cbD->cmdList->ResolveSubresource(dstRes->resource, dstSubresource,
2371 srcRes->resource, srcSubresource,
2372 dstTexD->dxgiFormat);
2373 }
2374 }
2375 if (rtTex->m_desc.depthResolveTexture())
2376 qWarning("Resolving multisample depth-stencil buffers is not supported with D3D");
2377 }
2378
2379 cbD->recordingPass = QD3D12CommandBuffer::NoPass;
2380 cbD->currentTarget = nullptr;
2381
2382 if (resourceUpdates)
2383 enqueueResourceUpdates(cbD, resourceUpdates);
2384}
2385
2386void QRhiD3D12::beginComputePass(QRhiCommandBuffer *cb,
2387 QRhiResourceUpdateBatch *resourceUpdates,
2388 QRhiCommandBuffer::BeginPassFlags)
2389{
2390 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2391 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::NoPass);
2392
2393 if (resourceUpdates)
2394 enqueueResourceUpdates(cbD, resourceUpdates);
2395
2396 cbD->recordingPass = QD3D12CommandBuffer::ComputePass;
2397
2398 cbD->resetPerPassState();
2399}
2400
2401void QRhiD3D12::endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
2402{
2403 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2404 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::ComputePass);
2405
2406 cbD->recordingPass = QD3D12CommandBuffer::NoPass;
2407
2408 if (resourceUpdates)
2409 enqueueResourceUpdates(cbD, resourceUpdates);
2410}
2411
2412void QRhiD3D12::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps)
2413{
2414 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2415 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::ComputePass);
2416 QD3D12ComputePipeline *psD = QRHI_RES(QD3D12ComputePipeline, ps);
2417 const bool pipelineChanged = cbD->currentComputePipeline != psD || cbD->currentPipelineGeneration != psD->generation;
2418
2419 if (pipelineChanged) {
2420 cbD->currentGraphicsPipeline = nullptr;
2421 cbD->currentComputePipeline = psD;
2422 cbD->currentPipelineGeneration = psD->generation;
2423
2424 if (QD3D12Pipeline *pipeline = pipelinePool.lookupRef(psD->handle)) {
2425 Q_ASSERT(pipeline->type == QD3D12Pipeline::Compute);
2426 cbD->cmdList->SetPipelineState(pipeline->pso);
2427 if (QD3D12RootSignature *rs = rootSignaturePool.lookupRef(psD->rootSigHandle))
2428 cbD->cmdList->SetComputeRootSignature(rs->rootSig);
2429 }
2430 }
2431}
2432
2433void QRhiD3D12::dispatch(QRhiCommandBuffer *cb, int x, int y, int z)
2434{
2435 QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
2436 Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::ComputePass);
2437 cbD->cmdList->Dispatch(UINT(x), UINT(y), UINT(z));
2438}
2439
2440bool QD3D12DescriptorHeap::create(ID3D12Device *device,
2441 quint32 descriptorCount,
2442 D3D12_DESCRIPTOR_HEAP_TYPE heapType,
2443 D3D12_DESCRIPTOR_HEAP_FLAGS heapFlags)
2444{
2445 head = 0;
2446 capacity = descriptorCount;
2447 this->heapType = heapType;
2448 this->heapFlags = heapFlags;
2449
2450 D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
2451 heapDesc.Type = heapType;
2452 heapDesc.NumDescriptors = capacity;
2453 heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAGS(heapFlags);
2454
2455 HRESULT hr = device->CreateDescriptorHeap(&heapDesc, __uuidof(ID3D12DescriptorHeap), reinterpret_cast<void **>(&heap));
2456 if (FAILED(hr)) {
2457 qWarning("Failed to create descriptor heap: %s", qPrintable(QSystemError::windowsComString(hr)));
2458 heap = nullptr;
2459 capacity = descriptorByteSize = 0;
2460 return false;
2461 }
2462
2463 descriptorByteSize = device->GetDescriptorHandleIncrementSize(heapType);
2464 heapStart.cpuHandle = heap->GetCPUDescriptorHandleForHeapStart();
2465 if (heapFlags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)
2466 heapStart.gpuHandle = heap->GetGPUDescriptorHandleForHeapStart();
2467
2468 return true;
2469}
2470
2471void QD3D12DescriptorHeap::createWithExisting(const QD3D12DescriptorHeap &other,
2472 quint32 offsetInDescriptors,
2473 quint32 descriptorCount)
2474{
2475 heap = nullptr;
2476 head = 0;
2477 capacity = descriptorCount;
2478 heapType = other.heapType;
2479 heapFlags = other.heapFlags;
2480 descriptorByteSize = other.descriptorByteSize;
2481 heapStart = incremented(other.heapStart, offsetInDescriptors);
2482}
2483
2484void QD3D12DescriptorHeap::destroy()
2485{
2486 if (heap) {
2487 heap->Release();
2488 heap = nullptr;
2489 }
2490 capacity = 0;
2491}
2492
2493void QD3D12DescriptorHeap::destroyWithDeferredRelease(QD3D12ReleaseQueue *releaseQueue)
2494{
2495 if (heap) {
2496 releaseQueue->deferredReleaseDescriptorHeap(heap);
2497 heap = nullptr;
2498 }
2499 capacity = 0;
2500}
2501
2502QD3D12Descriptor QD3D12DescriptorHeap::get(quint32 count)
2503{
2504 Q_ASSERT(count > 0);
2505 if (head + count > capacity) {
2506 qWarning("Cannot get %u descriptors as that would exceed capacity %u", count, capacity);
2507 return {};
2508 }
2509 head += count;
2510 return at(head - count);
2511}
2512
2513QD3D12Descriptor QD3D12DescriptorHeap::at(quint32 index) const
2514{
2515 const quint32 startOffset = index * descriptorByteSize;
2516 QD3D12Descriptor result;
2517 result.cpuHandle.ptr = heapStart.cpuHandle.ptr + startOffset;
2518 if (heapStart.gpuHandle.ptr != 0)
2519 result.gpuHandle.ptr = heapStart.gpuHandle.ptr + startOffset;
2520 return result;
2521}
2522
2523bool QD3D12CpuDescriptorPool::create(ID3D12Device *device, D3D12_DESCRIPTOR_HEAP_TYPE heapType, const char *debugName)
2524{
2525 QD3D12DescriptorHeap firstHeap;
2526 if (!firstHeap.create(device, DESCRIPTORS_PER_HEAP, heapType, D3D12_DESCRIPTOR_HEAP_FLAG_NONE))
2527 return false;
2528 heaps.append(HeapWithMap::init(firstHeap, DESCRIPTORS_PER_HEAP));
2529 descriptorByteSize = heaps[0].heap.descriptorByteSize;
2530 this->device = device;
2531 this->debugName = debugName;
2532 return true;
2533}
2534
2535void QD3D12CpuDescriptorPool::destroy()
2536{
2537#ifndef QT_NO_DEBUG
2538 // debug builds: just do it always
2539 static bool leakCheck = true;
2540#else
2541 // release builds: opt-in
2542 static bool leakCheck = qEnvironmentVariableIntValue("QT_RHI_LEAK_CHECK");
2543#endif
2544 if (leakCheck) {
2545 for (HeapWithMap &heap : heaps) {
2546 const int leakedDescriptorCount = heap.map.count(true);
2547 if (leakedDescriptorCount > 0) {
2548 qWarning("QD3D12CpuDescriptorPool::destroy(): "
2549 "Heap %p for descriptor pool %p '%s' has %d unreleased descriptors",
2550 &heap.heap, this, debugName, leakedDescriptorCount);
2551 }
2552 }
2553 }
2554 for (HeapWithMap &heap : heaps)
2555 heap.heap.destroy();
2556 heaps.clear();
2557}
2558
2559QD3D12Descriptor QD3D12CpuDescriptorPool::allocate(quint32 count)
2560{
2561 Q_ASSERT(count > 0 && count <= DESCRIPTORS_PER_HEAP);
2562
2563 HeapWithMap &last(heaps.last());
2564 if (last.heap.head + count <= last.heap.capacity) {
2565 quint32 firstIndex = last.heap.head;
2566 for (quint32 i = 0; i < count; ++i)
2567 last.map.setBit(firstIndex + i);
2568 return last.heap.get(count);
2569 }
2570
2571 for (HeapWithMap &heap : heaps) {
2572 quint32 freeCount = 0;
2573 for (quint32 i = 0; i < DESCRIPTORS_PER_HEAP; ++i) {
2574 if (heap.map.testBit(i)) {
2575 freeCount = 0;
2576 } else {
2577 freeCount += 1;
2578 if (freeCount == count) {
2579 quint32 firstIndex = i - (freeCount - 1);
2580 for (quint32 j = 0; j < count; ++j) {
2581 heap.map.setBit(firstIndex + j);
2582 return heap.heap.at(firstIndex);
2583 }
2584 }
2585 }
2586 }
2587 }
2588
2589 QD3D12DescriptorHeap newHeap;
2590 if (!newHeap.create(device, DESCRIPTORS_PER_HEAP, last.heap.heapType, last.heap.heapFlags))
2591 return {};
2592
2593 heaps.append(HeapWithMap::init(newHeap, DESCRIPTORS_PER_HEAP));
2594
2595 for (quint32 i = 0; i < count; ++i)
2596 heaps.last().map.setBit(i);
2597
2598 return heaps.last().heap.get(count);
2599}
2600
2601void QD3D12CpuDescriptorPool::release(const QD3D12Descriptor &descriptor, quint32 count)
2602{
2603 Q_ASSERT(count > 0 && count <= DESCRIPTORS_PER_HEAP);
2604 if (!descriptor.isValid())
2605 return;
2606
2607 const SIZE_T addr = descriptor.cpuHandle.ptr;
2608 for (HeapWithMap &heap : heaps) {
2609 const SIZE_T begin = heap.heap.heapStart.cpuHandle.ptr;
2610 const SIZE_T end = begin + heap.heap.descriptorByteSize * heap.heap.capacity;
2611 if (addr >= begin && addr < end) {
2612 quint32 firstIndex = (addr - begin) / heap.heap.descriptorByteSize;
2613 for (quint32 i = 0; i < count; ++i)
2614 heap.map.setBit(firstIndex + i, false);
2615 return;
2616 }
2617 }
2618
2619 qWarning("QD3D12CpuDescriptorPool::release: Descriptor with address %llu is not in any heap",
2620 quint64(descriptor.cpuHandle.ptr));
2621}
2622
2623bool QD3D12QueryHeap::create(ID3D12Device *device,
2624 quint32 queryCount,
2625 D3D12_QUERY_HEAP_TYPE heapType)
2626{
2627 capacity = queryCount;
2628
2629 D3D12_QUERY_HEAP_DESC heapDesc = {};
2630 heapDesc.Type = heapType;
2631 heapDesc.Count = capacity;
2632
2633 HRESULT hr = device->CreateQueryHeap(&heapDesc, __uuidof(ID3D12QueryHeap), reinterpret_cast<void **>(&heap));
2634 if (FAILED(hr)) {
2635 qWarning("Failed to create query heap: %s", qPrintable(QSystemError::windowsComString(hr)));
2636 heap = nullptr;
2637 capacity = 0;
2638 return false;
2639 }
2640
2641 return true;
2642}
2643
2644void QD3D12QueryHeap::destroy()
2645{
2646 if (heap) {
2647 heap->Release();
2648 heap = nullptr;
2649 }
2650 capacity = 0;
2651}
2652
2653bool QD3D12StagingArea::create(QRhiD3D12 *rhi, quint32 capacity, D3D12_HEAP_TYPE heapType)
2654{
2655 Q_ASSERT(heapType == D3D12_HEAP_TYPE_UPLOAD || heapType == D3D12_HEAP_TYPE_READBACK);
2656 D3D12_RESOURCE_DESC resourceDesc = {};
2657 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
2658 resourceDesc.Width = capacity;
2659 resourceDesc.Height = 1;
2660 resourceDesc.DepthOrArraySize = 1;
2661 resourceDesc.MipLevels = 1;
2662 resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
2663 resourceDesc.SampleDesc = { 1, 0 };
2664 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
2665 resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
2666 UINT state = heapType == D3D12_HEAP_TYPE_UPLOAD ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
2667 HRESULT hr = rhi->vma.createResource(heapType,
2668 &resourceDesc,
2669 D3D12_RESOURCE_STATES(state),
2670 nullptr,
2671 &allocation,
2672 __uuidof(ID3D12Resource),
2673 reinterpret_cast<void **>(&resource));
2674 if (FAILED(hr)) {
2675 qWarning("Failed to create buffer for staging area: %s",
2676 qPrintable(QSystemError::windowsComString(hr)));
2677 return false;
2678 }
2679 void *p = nullptr;
2680 hr = resource->Map(0, nullptr, &p);
2681 if (FAILED(hr)) {
2682 qWarning("Failed to map buffer for staging area: %s",
2683 qPrintable(QSystemError::windowsComString(hr)));
2684 destroy();
2685 return false;
2686 }
2687
2688 mem.p = static_cast<quint8 *>(p);
2689 mem.gpuAddr = resource->GetGPUVirtualAddress();
2690 mem.buffer = resource;
2691 mem.bufferOffset = 0;
2692
2693 this->capacity = capacity;
2694 head = 0;
2695
2696 return true;
2697}
2698
2699void QD3D12StagingArea::destroy()
2700{
2701 if (resource) {
2702 resource->Release();
2703 resource = nullptr;
2704 }
2705 if (allocation) {
2706 allocation->Release();
2707 allocation = nullptr;
2708 }
2709 mem = {};
2710}
2711
2712void QD3D12StagingArea::destroyWithDeferredRelease(QD3D12ReleaseQueue *releaseQueue)
2713{
2714 if (resource)
2715 releaseQueue->deferredReleaseResourceAndAllocation(resource, allocation);
2716 mem = {};
2717}
2718
2719QD3D12StagingArea::Allocation QD3D12StagingArea::get(quint32 byteSize)
2720{
2721 const quint32 allocSize = aligned(byteSize, ALIGNMENT);
2722 if (head + allocSize > capacity) {
2723 qWarning("Failed to allocate %u (%u) bytes from staging area of size %u with %u bytes left",
2724 allocSize, byteSize, capacity, remainingCapacity());
2725 return {};
2726 }
2727 const quint32 offset = head;
2728 head += allocSize;
2729 return {
2730 mem.p + offset,
2731 mem.gpuAddr + offset,
2732 mem.buffer,
2733 offset
2734 };
2735}
2736
2737// Can be called inside and outside of begin-endFrame. Removes from the pool
2738// and releases the underlying native resource only in the frames_in_flight'th
2739// beginFrame() counted starting from the next endFrame().
2740void QD3D12ReleaseQueue::deferredReleaseResource(const QD3D12ObjectHandle &handle)
2741{
2742 DeferredReleaseEntry e;
2743 e.handle = handle;
2744 queue.append(e);
2745}
2746
2747void QD3D12ReleaseQueue::deferredReleaseResourceWithViews(const QD3D12ObjectHandle &handle,
2748 QD3D12CpuDescriptorPool *pool,
2749 const QD3D12Descriptor &viewsStart,
2750 int viewCount)
2751{
2752 DeferredReleaseEntry e;
2753 e.type = DeferredReleaseEntry::Resource;
2754 e.handle = handle;
2755 e.poolForViews = pool;
2756 e.viewsStart = viewsStart;
2757 e.viewCount = viewCount;
2758 queue.append(e);
2759}
2760
2761void QD3D12ReleaseQueue::deferredReleasePipeline(const QD3D12ObjectHandle &handle)
2762{
2763 DeferredReleaseEntry e;
2764 e.type = DeferredReleaseEntry::Pipeline;
2765 e.handle = handle;
2766 queue.append(e);
2767}
2768
2769void QD3D12ReleaseQueue::deferredReleaseRootSignature(const QD3D12ObjectHandle &handle)
2770{
2771 DeferredReleaseEntry e;
2772 e.type = DeferredReleaseEntry::RootSignature;
2773 e.handle = handle;
2774 queue.append(e);
2775}
2776
2777void QD3D12ReleaseQueue::deferredReleaseCallback(std::function<void(void*)> callback, void *userData)
2778{
2779 DeferredReleaseEntry e;
2780 e.type = DeferredReleaseEntry::Callback;
2781 e.callback = callback;
2782 e.callbackUserData = userData;
2783 queue.append(e);
2784}
2785
2786void QD3D12ReleaseQueue::deferredReleaseResourceAndAllocation(ID3D12Resource *resource,
2787 D3D12MA::Allocation *allocation)
2788{
2789 DeferredReleaseEntry e;
2790 e.type = DeferredReleaseEntry::ResourceAndAllocation;
2791 e.resourceAndAllocation = { resource, allocation };
2792 queue.append(e);
2793}
2794
2795void QD3D12ReleaseQueue::deferredReleaseDescriptorHeap(ID3D12DescriptorHeap *heap)
2796{
2797 DeferredReleaseEntry e;
2798 e.type = DeferredReleaseEntry::DescriptorHeap;
2799 e.descriptorHeap = heap;
2800 queue.append(e);
2801}
2802
2803void QD3D12ReleaseQueue::deferredReleaseViews(QD3D12CpuDescriptorPool *pool,
2804 const QD3D12Descriptor &viewsStart,
2805 int viewCount)
2806{
2807 DeferredReleaseEntry e;
2808 e.type = DeferredReleaseEntry::Views;
2809 e.poolForViews = pool;
2810 e.viewsStart = viewsStart;
2811 e.viewCount = viewCount;
2812 queue.append(e);
2813}
2814
2815void QD3D12ReleaseQueue::activatePendingDeferredReleaseRequests(int frameSlot)
2816{
2817 for (DeferredReleaseEntry &e : queue) {
2818 if (!e.frameSlotToBeReleasedIn.has_value())
2819 e.frameSlotToBeReleasedIn = frameSlot;
2820 }
2821}
2822
2823void QD3D12ReleaseQueue::executeDeferredReleases(int frameSlot, bool forced)
2824{
2825 for (int i = queue.count() - 1; i >= 0; --i) {
2826 const DeferredReleaseEntry &e(queue[i]);
2827 if (forced || (e.frameSlotToBeReleasedIn.has_value() && e.frameSlotToBeReleasedIn.value() == frameSlot)) {
2828 switch (e.type) {
2829 case DeferredReleaseEntry::Resource:
2830 resourcePool->remove(e.handle);
2831 if (e.poolForViews && e.viewsStart.isValid() && e.viewCount > 0)
2832 e.poolForViews->release(e.viewsStart, e.viewCount);
2833 break;
2834 case DeferredReleaseEntry::Pipeline:
2835 pipelinePool->remove(e.handle);
2836 break;
2837 case DeferredReleaseEntry::RootSignature:
2838 rootSignaturePool->remove(e.handle);
2839 break;
2840 case DeferredReleaseEntry::Callback:
2841 e.callback(e.callbackUserData);
2842 break;
2843 case DeferredReleaseEntry::ResourceAndAllocation:
2844 // order matters: resource first, then the allocation (which
2845 // may be null)
2846 e.resourceAndAllocation.first->Release();
2847 if (e.resourceAndAllocation.second)
2848 e.resourceAndAllocation.second->Release();
2849 break;
2850 case DeferredReleaseEntry::DescriptorHeap:
2851 e.descriptorHeap->Release();
2852 break;
2853 case DeferredReleaseEntry::Views:
2854 e.poolForViews->release(e.viewsStart, e.viewCount);
2855 break;
2856 }
2857 queue.removeAt(i);
2858 }
2859 }
2860}
2861
2862void QD3D12ReleaseQueue::releaseAll()
2863{
2864 executeDeferredReleases(0, true);
2865}
2866
2867void QD3D12ResourceBarrierGenerator::addTransitionBarrier(const QD3D12ObjectHandle &resourceHandle,
2868 D3D12_RESOURCE_STATES stateAfter)
2869{
2870 if (QD3D12Resource *res = resourcePool->lookupRef(resourceHandle)) {
2871 if (stateAfter != res->state) {
2872 transitionResourceBarriers.append({ resourceHandle, res->state, stateAfter });
2873 res->state = stateAfter;
2874 }
2875 }
2876}
2877
2878void QD3D12ResourceBarrierGenerator::enqueueBufferedTransitionBarriers(QD3D12CommandBuffer *cbD)
2879{
2880 QVarLengthArray<D3D12_RESOURCE_BARRIER, PREALLOC> barriers;
2881 for (const TransitionResourceBarrier &trb : transitionResourceBarriers) {
2882 if (QD3D12Resource *res = resourcePool->lookupRef(trb.resourceHandle)) {
2883 D3D12_RESOURCE_BARRIER barrier = {};
2884 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
2885 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
2886 barrier.Transition.pResource = res->resource;
2887 barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
2888 barrier.Transition.StateBefore = trb.stateBefore;
2889 barrier.Transition.StateAfter = trb.stateAfter;
2890 barriers.append(barrier);
2891 }
2892 }
2893 transitionResourceBarriers.clear();
2894 if (!barriers.isEmpty())
2895 cbD->cmdList->ResourceBarrier(barriers.count(), barriers.constData());
2896}
2897
2898void QD3D12ResourceBarrierGenerator::enqueueSubresourceTransitionBarrier(QD3D12CommandBuffer *cbD,
2899 const QD3D12ObjectHandle &resourceHandle,
2900 UINT subresource,
2901 D3D12_RESOURCE_STATES stateBefore,
2902 D3D12_RESOURCE_STATES stateAfter)
2903{
2904 if (QD3D12Resource *res = resourcePool->lookupRef(resourceHandle)) {
2905 D3D12_RESOURCE_BARRIER barrier = {};
2906 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
2907 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
2908 barrier.Transition.pResource = res->resource;
2909 barrier.Transition.Subresource = subresource;
2910 barrier.Transition.StateBefore = stateBefore;
2911 barrier.Transition.StateAfter = stateAfter;
2912 cbD->cmdList->ResourceBarrier(1, &barrier);
2913 }
2914}
2915
2916void QD3D12ResourceBarrierGenerator::enqueueUavBarrier(QD3D12CommandBuffer *cbD,
2917 const QD3D12ObjectHandle &resourceHandle)
2918{
2919 if (QD3D12Resource *res = resourcePool->lookupRef(resourceHandle)) {
2920 D3D12_RESOURCE_BARRIER barrier = {};
2921 barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
2922 barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
2923 barrier.UAV.pResource = res->resource;
2924 cbD->cmdList->ResourceBarrier(1, &barrier);
2925 }
2926}
2927
2928void QD3D12ShaderBytecodeCache::insertWithCapacityLimit(const QRhiShaderStage &key, const Shader &s)
2929{
2930 if (data.count() >= QRhiD3D12::MAX_SHADER_CACHE_ENTRIES)
2931 data.clear();
2932 data.insert(key, s);
2933}
2934
2935bool QD3D12ShaderVisibleDescriptorHeap::create(ID3D12Device *device,
2936 D3D12_DESCRIPTOR_HEAP_TYPE type,
2937 quint32 perFrameDescriptorCount)
2938{
2939 Q_ASSERT(type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV || type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
2940
2941 quint32 size = perFrameDescriptorCount * QD3D12_FRAMES_IN_FLIGHT;
2942
2943 // https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-support
2944 const quint32 CBV_SRV_UAV_MAX = 1000000;
2945 const quint32 SAMPLER_MAX = 2048;
2946 if (type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
2947 size = qMin(size, CBV_SRV_UAV_MAX);
2948 else if (type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)
2949 size = qMin(size, SAMPLER_MAX);
2950
2951 if (!heap.create(device, size, type, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)) {
2952 qWarning("Failed to create shader-visible descriptor heap of size %u", size);
2953 return false;
2954 }
2955
2956 perFrameDescriptorCount = size / QD3D12_FRAMES_IN_FLIGHT;
2957 quint32 currentOffsetInDescriptors = 0;
2958 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
2959 perFrameHeapSlice[i].createWithExisting(heap, currentOffsetInDescriptors, perFrameDescriptorCount);
2960 currentOffsetInDescriptors += perFrameDescriptorCount;
2961 }
2962
2963 return true;
2964}
2965
2966void QD3D12ShaderVisibleDescriptorHeap::destroy()
2967{
2968 heap.destroy();
2969}
2970
2971void QD3D12ShaderVisibleDescriptorHeap::destroyWithDeferredRelease(QD3D12ReleaseQueue *releaseQueue)
2972{
2973 heap.destroyWithDeferredRelease(releaseQueue);
2974}
2975
2976static inline std::pair<int, int> mapBinding(int binding, const QShader::NativeResourceBindingMap &map)
2977{
2978 if (map.isEmpty())
2979 return { binding, binding }; // assume 1:1 mapping
2980
2981 auto it = map.constFind(binding);
2982 if (it != map.cend())
2983 return *it;
2984
2985 // Hitting this path is normal too. It is not given that the resource is
2986 // present in the shaders for all the stages specified by the visibility
2987 // mask in the QRhiShaderResourceBinding.
2988 return { -1, -1 };
2989}
2990
2991void QD3D12ShaderResourceVisitor::visit()
2992{
2993 for (int bindingIdx = 0, bindingCount = srb->m_bindings.count(); bindingIdx != bindingCount; ++bindingIdx) {
2994 const QRhiShaderResourceBinding &b(srb->m_bindings[bindingIdx]);
2995 const QRhiShaderResourceBinding::Data *bd = QRhiImplementation::shaderResourceBindingData(b);
2996
2997 for (int stageIdx = 0; stageIdx < stageCount; ++stageIdx) {
2998 const QD3D12ShaderStageData *sd = &stageData[stageIdx];
2999 if (!sd->valid)
3000 continue;
3001
3002 if (!bd->stage.testFlag(qd3d12_stageToSrb(sd->stage)))
3003 continue;
3004
3005 switch (bd->type) {
3006 case QRhiShaderResourceBinding::UniformBuffer:
3007 {
3008 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3009 if (shaderRegister >= 0 && uniformBuffer)
3010 uniformBuffer(sd->stage, bd->u.ubuf, shaderRegister, bd->binding);
3011 }
3012 break;
3013 case QRhiShaderResourceBinding::SampledTexture:
3014 {
3015 Q_ASSERT(bd->u.stex.count > 0);
3016 const int textureBaseShaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3017 const int samplerBaseShaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).second;
3018 for (int i = 0; i < bd->u.stex.count; ++i) {
3019 if (textureBaseShaderRegister >= 0 && texture)
3020 texture(sd->stage, bd->u.stex.texSamplers[i], textureBaseShaderRegister + i);
3021 if (samplerBaseShaderRegister >= 0 && sampler)
3022 sampler(sd->stage, bd->u.stex.texSamplers[i], samplerBaseShaderRegister + i);
3023 }
3024 }
3025 break;
3026 case QRhiShaderResourceBinding::Texture:
3027 {
3028 Q_ASSERT(bd->u.stex.count > 0);
3029 const int baseShaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3030 if (baseShaderRegister >= 0 && texture) {
3031 for (int i = 0; i < bd->u.stex.count; ++i)
3032 texture(sd->stage, bd->u.stex.texSamplers[i], baseShaderRegister + i);
3033 }
3034 }
3035 break;
3036 case QRhiShaderResourceBinding::Sampler:
3037 {
3038 Q_ASSERT(bd->u.stex.count > 0);
3039 const int baseShaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3040 if (baseShaderRegister >= 0 && sampler) {
3041 for (int i = 0; i < bd->u.stex.count; ++i)
3042 sampler(sd->stage, bd->u.stex.texSamplers[i], baseShaderRegister + i);
3043 }
3044 }
3045 break;
3046 case QRhiShaderResourceBinding::ImageLoad:
3047 {
3048 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3049 if (shaderRegister >= 0 && storageImage)
3050 storageImage(sd->stage, bd->u.simage, Load, shaderRegister);
3051 }
3052 break;
3053 case QRhiShaderResourceBinding::ImageStore:
3054 {
3055 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3056 if (shaderRegister >= 0 && storageImage)
3057 storageImage(sd->stage, bd->u.simage, Store, shaderRegister);
3058 }
3059 break;
3060 case QRhiShaderResourceBinding::ImageLoadStore:
3061 {
3062 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3063 if (shaderRegister >= 0 && storageImage)
3064 storageImage(sd->stage, bd->u.simage, LoadStore, shaderRegister);
3065 }
3066 break;
3067 case QRhiShaderResourceBinding::BufferLoad:
3068 {
3069 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3070 if (shaderRegister >= 0 && storageBuffer)
3071 storageBuffer(sd->stage, bd->u.sbuf, Load, shaderRegister);
3072 }
3073 break;
3074 case QRhiShaderResourceBinding::BufferStore:
3075 {
3076 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3077 if (shaderRegister >= 0 && storageBuffer)
3078 storageBuffer(sd->stage, bd->u.sbuf, Store, shaderRegister);
3079 }
3080 break;
3081 case QRhiShaderResourceBinding::BufferLoadStore:
3082 {
3083 const int shaderRegister = mapBinding(bd->binding, sd->nativeResourceBindingMap).first;
3084 if (shaderRegister >= 0 && storageBuffer)
3085 storageBuffer(sd->stage, bd->u.sbuf, LoadStore, shaderRegister);
3086 }
3087 break;
3088 }
3089 }
3090 }
3091}
3092
3093bool QD3D12SamplerManager::create(ID3D12Device *device)
3094{
3095 // This does not need to be per-frame slot, just grab space for MAX_SAMPLERS samplers.
3096 if (!shaderVisibleSamplerHeap.create(device,
3097 D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
3098 MAX_SAMPLERS / QD3D12_FRAMES_IN_FLIGHT))
3099 {
3100 qWarning("Could not create shader-visible SAMPLER heap");
3101 return false;
3102 }
3103
3104 this->device = device;
3105 return true;
3106}
3107
3108void QD3D12SamplerManager::destroy()
3109{
3110 if (device) {
3111 shaderVisibleSamplerHeap.destroy();
3112 device = nullptr;
3113 }
3114}
3115
3116QD3D12Descriptor QD3D12SamplerManager::getShaderVisibleDescriptor(const D3D12_SAMPLER_DESC &desc)
3117{
3118 auto it = gpuMap.constFind({desc});
3119 if (it != gpuMap.cend())
3120 return *it;
3121
3122 QD3D12Descriptor descriptor = shaderVisibleSamplerHeap.heap.get(1);
3123 if (descriptor.isValid()) {
3124 device->CreateSampler(&desc, descriptor.cpuHandle);
3125 gpuMap.insert({desc}, descriptor);
3126 } else {
3127 qWarning("Out of shader-visible SAMPLER descriptor heap space,"
3128 " this should not happen, maximum number of unique samplers is %u",
3129 shaderVisibleSamplerHeap.heap.capacity);
3130 }
3131
3132 return descriptor;
3133}
3134
3135bool QD3D12MipmapGenerator::create(QRhiD3D12 *rhiD)
3136{
3137 this->rhiD = rhiD;
3138
3139 D3D12_ROOT_PARAMETER1 rootParams[3] = {};
3140 D3D12_DESCRIPTOR_RANGE1 descriptorRanges[2] = {};
3141
3142 // b0
3143 rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
3144 rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3145 rootParams[0].Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC;
3146
3147 // t0
3148 descriptorRanges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
3149 descriptorRanges[0].NumDescriptors = 1;
3150 descriptorRanges[0].Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE;
3151 rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
3152 rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3153 rootParams[1].DescriptorTable.NumDescriptorRanges = 1;
3154 rootParams[1].DescriptorTable.pDescriptorRanges = &descriptorRanges[0];
3155
3156 // u0..3
3157 descriptorRanges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
3158 descriptorRanges[1].NumDescriptors = 4;
3159 rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
3160 rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3161 rootParams[2].DescriptorTable.NumDescriptorRanges = 1;
3162 rootParams[2].DescriptorTable.pDescriptorRanges = &descriptorRanges[1];
3163
3164 // s0
3165 D3D12_STATIC_SAMPLER_DESC samplerDesc = {};
3166 samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
3167 samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3168 samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3169 samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3170 samplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3171
3172 D3D12_VERSIONED_ROOT_SIGNATURE_DESC rsDesc = {};
3173 rsDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
3174 rsDesc.Desc_1_1.NumParameters = 3;
3175 rsDesc.Desc_1_1.pParameters = rootParams;
3176 rsDesc.Desc_1_1.NumStaticSamplers = 1;
3177 rsDesc.Desc_1_1.pStaticSamplers = &samplerDesc;
3178
3179 ID3DBlob *signature = nullptr;
3180 HRESULT hr = D3D12SerializeVersionedRootSignature(&rsDesc, &signature, nullptr);
3181 if (FAILED(hr)) {
3182 qWarning("Failed to serialize root signature: %s", qPrintable(QSystemError::windowsComString(hr)));
3183 return false;
3184 }
3185 ID3D12RootSignature *rootSig = nullptr;
3186 hr = rhiD->dev->CreateRootSignature(0,
3187 signature->GetBufferPointer(),
3188 signature->GetBufferSize(),
3189 __uuidof(ID3D12RootSignature),
3190 reinterpret_cast<void **>(&rootSig));
3191 signature->Release();
3192 if (FAILED(hr)) {
3193 qWarning("Failed to create root signature: %s",
3194 qPrintable(QSystemError::windowsComString(hr)));
3195 return false;
3196 }
3197
3198 rootSigHandle = QD3D12RootSignature::addToPool(&rhiD->rootSignaturePool, rootSig);
3199
3200 D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
3201 psoDesc.pRootSignature = rootSig;
3202 psoDesc.CS.pShaderBytecode = g_csMipmap;
3203 psoDesc.CS.BytecodeLength = sizeof(g_csMipmap);
3204 ID3D12PipelineState *pso = nullptr;
3205 hr = rhiD->dev->CreateComputePipelineState(&psoDesc,
3206 __uuidof(ID3D12PipelineState),
3207 reinterpret_cast<void **>(&pso));
3208 if (FAILED(hr)) {
3209 qWarning("Failed to create compute pipeline state: %s",
3210 qPrintable(QSystemError::windowsComString(hr)));
3211 rhiD->rootSignaturePool.remove(rootSigHandle);
3212 rootSigHandle = {};
3213 return false;
3214 }
3215
3216 pipelineHandle = QD3D12Pipeline::addToPool(&rhiD->pipelinePool, QD3D12Pipeline::Compute, pso);
3217
3218 return true;
3219}
3220
3221void QD3D12MipmapGenerator::destroy()
3222{
3223 rhiD->pipelinePool.remove(pipelineHandle);
3224 pipelineHandle = {};
3225 rhiD->rootSignaturePool.remove(rootSigHandle);
3226 rootSigHandle = {};
3227}
3228
3229void QD3D12MipmapGenerator::generate(QD3D12CommandBuffer *cbD, const QD3D12ObjectHandle &textureHandle)
3230{
3231 QD3D12Pipeline *pipeline = rhiD->pipelinePool.lookupRef(pipelineHandle);
3232 if (!pipeline)
3233 return;
3234 QD3D12RootSignature *rootSig = rhiD->rootSignaturePool.lookupRef(rootSigHandle);
3235 if (!rootSig)
3236 return;
3237 QD3D12Resource *res = rhiD->resourcePool.lookupRef(textureHandle);
3238 if (!res)
3239 return;
3240
3241 const quint32 mipLevelCount = res->desc.MipLevels;
3242 if (mipLevelCount < 2)
3243 return;
3244
3245 if (res->desc.SampleDesc.Count > 1) {
3246 qWarning("Cannot generate mipmaps for MSAA texture");
3247 return;
3248 }
3249
3250 const bool is1D = res->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE1D;
3251 if (is1D) {
3252 qWarning("Cannot generate mipmaps for 1D texture");
3253 return;
3254 }
3255
3256 const bool is3D = res->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D;
3257 const bool isCubeOrArray = res->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D
3258 && res->desc.DepthOrArraySize > 1;
3259 const quint32 layerCount = isCubeOrArray ? res->desc.DepthOrArraySize : 1;
3260
3261 if (is3D) {
3262 qWarning("2D mipmap generator invoked for 3D texture, this should not happen");
3263 return;
3264 }
3265
3266 rhiD->barrierGen.addTransitionBarrier(textureHandle, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
3267 rhiD->barrierGen.enqueueBufferedTransitionBarriers(cbD);
3268
3269 cbD->cmdList->SetPipelineState(pipeline->pso);
3270 cbD->cmdList->SetComputeRootSignature(rootSig->rootSig);
3271
3272 const quint32 descriptorByteSize = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].descriptorByteSize;
3273
3274 struct CBufData {
3275 quint32 srcMipLevel;
3276 quint32 numMipLevels;
3277 float texelWidth;
3278 float texelHeight;
3279 };
3280
3281 const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(sizeof(CBufData), mipLevelCount * layerCount);
3282 std::optional<QD3D12StagingArea> ownStagingArea;
3283 if (rhiD->smallStagingAreas[rhiD->currentFrameSlot].remainingCapacity() < allocSize) {
3284 ownStagingArea = QD3D12StagingArea();
3285 if (!ownStagingArea->create(rhiD, allocSize, D3D12_HEAP_TYPE_UPLOAD)) {
3286 qWarning("Could not create staging area for mipmap generation");
3287 return;
3288 }
3289 }
3290 QD3D12StagingArea *workArea = ownStagingArea.has_value()
3291 ? &ownStagingArea.value()
3292 : &rhiD->smallStagingAreas[rhiD->currentFrameSlot];
3293
3294 bool gotNewHeap = false;
3295 if (!rhiD->ensureShaderVisibleDescriptorHeapCapacity(&rhiD->shaderVisibleCbvSrvUavHeap,
3296 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
3297 rhiD->currentFrameSlot,
3298 (1 + 4) * mipLevelCount * layerCount,
3299 &gotNewHeap))
3300 {
3301 qWarning("Could not ensure enough space in descriptor heap for mipmap generation");
3302 return;
3303 }
3304 if (gotNewHeap)
3305 rhiD->bindShaderVisibleHeaps(cbD);
3306
3307 for (quint32 layer = 0; layer < layerCount; ++layer) {
3308 for (quint32 level = 0; level < mipLevelCount ;) {
3309 UINT subresource = calcSubresource(level, layer, res->desc.MipLevels);
3310 rhiD->barrierGen.enqueueSubresourceTransitionBarrier(cbD, textureHandle, subresource,
3311 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3312 D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
3313
3314 quint32 levelPlusOneMipWidth = res->desc.Width >> (level + 1);
3315 quint32 levelPlusOneMipHeight = res->desc.Height >> (level + 1);
3316 const quint32 dw = levelPlusOneMipWidth == 1 ? levelPlusOneMipHeight : levelPlusOneMipWidth;
3317 const quint32 dh = levelPlusOneMipHeight == 1 ? levelPlusOneMipWidth : levelPlusOneMipHeight;
3318 // number of times the size can be halved while still resulting in an even dimension
3319 const quint32 additionalMips = qCountTrailingZeroBits(dw | dh);
3320 const quint32 numGenMips = qMin(1u + qMin(3u, additionalMips), res->desc.MipLevels - level);
3321 levelPlusOneMipWidth = qMax(1u, levelPlusOneMipWidth);
3322 levelPlusOneMipHeight = qMax(1u, levelPlusOneMipHeight);
3323
3324 CBufData cbufData = {
3325 level,
3326 numGenMips,
3327 1.0f / float(levelPlusOneMipWidth),
3328 1.0f / float(levelPlusOneMipHeight)
3329 };
3330
3331 QD3D12StagingArea::Allocation cbuf = workArea->get(sizeof(cbufData));
3332 memcpy(cbuf.p, &cbufData, sizeof(cbufData));
3333 cbD->cmdList->SetComputeRootConstantBufferView(0, cbuf.gpuAddr);
3334
3335 QD3D12Descriptor srv = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].get(1);
3336 D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
3337 srvDesc.Format = res->desc.Format;
3338 srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
3339 if (isCubeOrArray) {
3340 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
3341 srvDesc.Texture2DArray.MipLevels = res->desc.MipLevels;
3342 srvDesc.Texture2DArray.FirstArraySlice = layer;
3343 srvDesc.Texture2DArray.ArraySize = 1;
3344 } else {
3345 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
3346 srvDesc.Texture2D.MipLevels = res->desc.MipLevels;
3347 }
3348 rhiD->dev->CreateShaderResourceView(res->resource, &srvDesc, srv.cpuHandle);
3349 cbD->cmdList->SetComputeRootDescriptorTable(1, srv.gpuHandle);
3350
3351 QD3D12Descriptor uavStart = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].get(4);
3352 D3D12_CPU_DESCRIPTOR_HANDLE uavCpuHandle = uavStart.cpuHandle;
3353 // if level is N, then need UAVs for levels N+1, ..., N+4
3354 for (quint32 uavIdx = 0; uavIdx < 4; ++uavIdx) {
3355 const quint32 uavMipLevel = qMin(level + 1u + uavIdx, res->desc.MipLevels - 1u);
3356 D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
3357 uavDesc.Format = res->desc.Format;
3358 if (isCubeOrArray) {
3359 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
3360 uavDesc.Texture2DArray.MipSlice = uavMipLevel;
3361 uavDesc.Texture2DArray.FirstArraySlice = layer;
3362 uavDesc.Texture2DArray.ArraySize = 1;
3363 } else {
3364 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
3365 uavDesc.Texture2D.MipSlice = uavMipLevel;
3366 }
3367 rhiD->dev->CreateUnorderedAccessView(res->resource, nullptr, &uavDesc, uavCpuHandle);
3368 uavCpuHandle.ptr += descriptorByteSize;
3369 }
3370 cbD->cmdList->SetComputeRootDescriptorTable(2, uavStart.gpuHandle);
3371
3372 cbD->cmdList->Dispatch(levelPlusOneMipWidth, levelPlusOneMipHeight, 1);
3373
3374 rhiD->barrierGen.enqueueUavBarrier(cbD, textureHandle);
3375 rhiD->barrierGen.enqueueSubresourceTransitionBarrier(cbD, textureHandle, subresource,
3376 D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
3377 D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
3378
3379 level += numGenMips;
3380 }
3381 }
3382
3383 if (ownStagingArea.has_value())
3384 ownStagingArea->destroyWithDeferredRelease(&rhiD->releaseQueue);
3385}
3386
3387bool QD3D12MipmapGenerator3D::create(QRhiD3D12 *rhiD)
3388{
3389 this->rhiD = rhiD;
3390
3391 D3D12_ROOT_PARAMETER1 rootParams[3] = {};
3392 D3D12_DESCRIPTOR_RANGE1 descriptorRanges[2] = {};
3393
3394 // b0
3395 rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
3396 rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3397 rootParams[0].Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC;
3398
3399 // t0
3400 descriptorRanges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
3401 descriptorRanges[0].NumDescriptors = 1;
3402 descriptorRanges[0].Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE;
3403 rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
3404 rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3405 rootParams[1].DescriptorTable.NumDescriptorRanges = 1;
3406 rootParams[1].DescriptorTable.pDescriptorRanges = &descriptorRanges[0];
3407
3408 // u0
3409 descriptorRanges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
3410 descriptorRanges[1].NumDescriptors = 1;
3411 rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
3412 rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3413 rootParams[2].DescriptorTable.NumDescriptorRanges = 1;
3414 rootParams[2].DescriptorTable.pDescriptorRanges = &descriptorRanges[1];
3415
3416 // s0
3417 D3D12_STATIC_SAMPLER_DESC samplerDesc = {};
3418 samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
3419 samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3420 samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3421 samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
3422 samplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
3423
3424 D3D12_VERSIONED_ROOT_SIGNATURE_DESC rsDesc = {};
3425 rsDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
3426 rsDesc.Desc_1_1.NumParameters = 3;
3427 rsDesc.Desc_1_1.pParameters = rootParams;
3428 rsDesc.Desc_1_1.NumStaticSamplers = 1;
3429 rsDesc.Desc_1_1.pStaticSamplers = &samplerDesc;
3430
3431 ID3DBlob *signature = nullptr;
3432 HRESULT hr = D3D12SerializeVersionedRootSignature(&rsDesc, &signature, nullptr);
3433 if (FAILED(hr)) {
3434 qWarning("Failed to serialize root signature: %s", qPrintable(QSystemError::windowsComString(hr)));
3435 return false;
3436 }
3437 ID3D12RootSignature *rootSig = nullptr;
3438 hr = rhiD->dev->CreateRootSignature(0,
3439 signature->GetBufferPointer(),
3440 signature->GetBufferSize(),
3441 __uuidof(ID3D12RootSignature),
3442 reinterpret_cast<void **>(&rootSig));
3443 signature->Release();
3444 if (FAILED(hr)) {
3445 qWarning("Failed to create root signature: %s",
3446 qPrintable(QSystemError::windowsComString(hr)));
3447 return false;
3448 }
3449
3450 rootSigHandle = QD3D12RootSignature::addToPool(&rhiD->rootSignaturePool, rootSig);
3451
3452 D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
3453 psoDesc.pRootSignature = rootSig;
3454 psoDesc.CS.pShaderBytecode = g_csMipmap3D;
3455 psoDesc.CS.BytecodeLength = sizeof(g_csMipmap3D);
3456 ID3D12PipelineState *pso = nullptr;
3457 hr = rhiD->dev->CreateComputePipelineState(&psoDesc,
3458 __uuidof(ID3D12PipelineState),
3459 reinterpret_cast<void **>(&pso));
3460 if (FAILED(hr)) {
3461 qWarning("Failed to create compute pipeline state: %s",
3462 qPrintable(QSystemError::windowsComString(hr)));
3463 rhiD->rootSignaturePool.remove(rootSigHandle);
3464 rootSigHandle = {};
3465 return false;
3466 }
3467
3468 pipelineHandle = QD3D12Pipeline::addToPool(&rhiD->pipelinePool, QD3D12Pipeline::Compute, pso);
3469
3470 return true;
3471}
3472
3473void QD3D12MipmapGenerator3D::destroy()
3474{
3475 rhiD->pipelinePool.remove(pipelineHandle);
3476 pipelineHandle = {};
3477 rhiD->rootSignaturePool.remove(rootSigHandle);
3478 rootSigHandle = {};
3479}
3480
3481void QD3D12MipmapGenerator3D::generate(QD3D12CommandBuffer *cbD, const QD3D12ObjectHandle &textureHandle)
3482{
3483 QD3D12Pipeline *pipeline = rhiD->pipelinePool.lookupRef(pipelineHandle);
3484 if (!pipeline)
3485 return;
3486 QD3D12RootSignature *rootSig = rhiD->rootSignaturePool.lookupRef(rootSigHandle);
3487 if (!rootSig)
3488 return;
3489 QD3D12Resource *res = rhiD->resourcePool.lookupRef(textureHandle);
3490 if (!res)
3491 return;
3492
3493 const quint32 mipLevelCount = res->desc.MipLevels;
3494 if (mipLevelCount < 2)
3495 return;
3496
3497 const bool is3D = res->desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D;
3498 if (!is3D) {
3499 qWarning("3D mipmap generator invoked for non-3D texture, this should not happen");
3500 return;
3501 }
3502
3503 rhiD->barrierGen.addTransitionBarrier(textureHandle, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
3504 rhiD->barrierGen.enqueueBufferedTransitionBarriers(cbD);
3505
3506 cbD->cmdList->SetPipelineState(pipeline->pso);
3507 cbD->cmdList->SetComputeRootSignature(rootSig->rootSig);
3508
3509 const quint32 descriptorByteSize = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].descriptorByteSize;
3510
3511 struct CBufData {
3512 float texelWidth;
3513 float texelHeight;
3514 float texelDepth;
3515 quint32 srcMipLevel;
3516 };
3517
3518 const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(sizeof(CBufData), mipLevelCount);
3519 std::optional<QD3D12StagingArea> ownStagingArea;
3520 if (rhiD->smallStagingAreas[rhiD->currentFrameSlot].remainingCapacity() < allocSize) {
3521 ownStagingArea = QD3D12StagingArea();
3522 if (!ownStagingArea->create(rhiD, allocSize, D3D12_HEAP_TYPE_UPLOAD)) {
3523 qWarning("Could not create staging area for mipmap generation");
3524 return;
3525 }
3526 }
3527 QD3D12StagingArea *workArea = ownStagingArea.has_value()
3528 ? &ownStagingArea.value()
3529 : &rhiD->smallStagingAreas[rhiD->currentFrameSlot];
3530
3531 bool gotNewHeap = false;
3532 if (!rhiD->ensureShaderVisibleDescriptorHeapCapacity(&rhiD->shaderVisibleCbvSrvUavHeap,
3533 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
3534 rhiD->currentFrameSlot,
3535 (1 + 1) * mipLevelCount, // 1 SRV + 1 UAV
3536 &gotNewHeap))
3537 {
3538 qWarning("Could not ensure enough space in descriptor heap for mipmap generation");
3539 return;
3540 }
3541 if (gotNewHeap)
3542 rhiD->bindShaderVisibleHeaps(cbD);
3543
3544 for (quint32 level = 0; level < mipLevelCount; ++level) {
3545 UINT subresource = calcSubresource(level, 0u, res->desc.MipLevels);
3546 rhiD->barrierGen.enqueueSubresourceTransitionBarrier(cbD, textureHandle, subresource,
3547 D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
3548 D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
3549
3550 quint32 levelPlusOneMipWidth = qMax<quint32>(1, res->desc.Width >> (level + 1));
3551 quint32 levelPlusOneMipHeight = qMax<quint32>(1, res->desc.Height >> (level + 1));
3552 quint32 levelPlusOneMipDepth = qMax<quint32>(1, res->desc.DepthOrArraySize >> (level + 1));
3553
3554 CBufData cbufData = {
3555 1.0f / float(levelPlusOneMipWidth),
3556 1.0f / float(levelPlusOneMipHeight),
3557 1.0f / float(levelPlusOneMipDepth),
3558 quint32(level)
3559 };
3560
3561 QD3D12StagingArea::Allocation cbuf = workArea->get(sizeof(cbufData));
3562 memcpy(cbuf.p, &cbufData, sizeof(cbufData));
3563 cbD->cmdList->SetComputeRootConstantBufferView(0, cbuf.gpuAddr);
3564
3565 QD3D12Descriptor srv = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].get(1);
3566 D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
3567 srvDesc.Format = res->desc.Format;
3568 srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
3569 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
3570 srvDesc.Texture3D.MipLevels = res->desc.MipLevels;
3571
3572 rhiD->dev->CreateShaderResourceView(res->resource, &srvDesc, srv.cpuHandle);
3573 cbD->cmdList->SetComputeRootDescriptorTable(1, srv.gpuHandle);
3574
3575 QD3D12Descriptor uavStart = rhiD->shaderVisibleCbvSrvUavHeap.perFrameHeapSlice[rhiD->currentFrameSlot].get(1);
3576 D3D12_CPU_DESCRIPTOR_HANDLE uavCpuHandle = uavStart.cpuHandle;
3577 const quint32 uavMipLevel = qMin(level + 1u, res->desc.MipLevels - 1u);
3578 D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
3579 uavDesc.Format = res->desc.Format;
3580 uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
3581 uavDesc.Texture3D.MipSlice = uavMipLevel;
3582 uavDesc.Texture3D.WSize = UINT(-1);
3583 rhiD->dev->CreateUnorderedAccessView(res->resource, nullptr, &uavDesc, uavCpuHandle);
3584 uavCpuHandle.ptr += descriptorByteSize;
3585 cbD->cmdList->SetComputeRootDescriptorTable(2, uavStart.gpuHandle);
3586
3587 cbD->cmdList->Dispatch(levelPlusOneMipWidth, levelPlusOneMipHeight, levelPlusOneMipDepth);
3588
3589 rhiD->barrierGen.enqueueUavBarrier(cbD, textureHandle);
3590 rhiD->barrierGen.enqueueSubresourceTransitionBarrier(cbD, textureHandle, subresource,
3591 D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE,
3592 D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
3593 }
3594
3595 if (ownStagingArea.has_value())
3596 ownStagingArea->destroyWithDeferredRelease(&rhiD->releaseQueue);
3597}
3598
3599bool QD3D12MemoryAllocator::create(ID3D12Device *device, IDXGIAdapter1 *adapter)
3600{
3601 this->device = device;
3602
3603 // We can function with and without D3D12MA: CreateCommittedResource is
3604 // just fine for our purposes and not any complicated API-wise; the memory
3605 // allocator is interesting for efficiency mainly since it can suballocate
3606 // instead of making everything a committed resource allocation.
3607
3608 static bool disableMA = qEnvironmentVariableIntValue("QT_D3D_NO_SUBALLOC");
3609 if (disableMA)
3610 return true;
3611
3612 DXGI_ADAPTER_DESC1 desc;
3613 adapter->GetDesc1(&desc);
3614 if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE)
3615 return true;
3616
3617 D3D12MA::ALLOCATOR_DESC allocatorDesc = {};
3618 allocatorDesc.pDevice = device;
3619 allocatorDesc.pAdapter = adapter;
3620 // A QRhi is supposed to be used from one single thread only. Disable
3621 // the allocator's own mutexes. This may give a performance boost.
3622 allocatorDesc.Flags = D3D12MA::ALLOCATOR_FLAG_SINGLETHREADED;
3623 HRESULT hr = D3D12MA::CreateAllocator(&allocatorDesc, &allocator);
3624 if (FAILED(hr)) {
3625 qWarning("Failed to initialize D3D12 Memory Allocator: %s",
3626 qPrintable(QSystemError::windowsComString(hr)));
3627 return false;
3628 }
3629 return true;
3630}
3631
3632void QD3D12MemoryAllocator::destroy()
3633{
3634 if (allocator) {
3635 allocator->Release();
3636 allocator = nullptr;
3637 }
3638}
3639
3640HRESULT QD3D12MemoryAllocator::createResource(D3D12_HEAP_TYPE heapType,
3641 const D3D12_RESOURCE_DESC *resourceDesc,
3642 D3D12_RESOURCE_STATES initialState,
3643 const D3D12_CLEAR_VALUE *optimizedClearValue,
3644 D3D12MA::Allocation **maybeAllocation,
3645 REFIID riidResource,
3646 void **ppvResource)
3647{
3648 if (allocator) {
3649 D3D12MA::ALLOCATION_DESC allocDesc = {};
3650 allocDesc.HeapType = heapType;
3651 return allocator->CreateResource(&allocDesc,
3652 resourceDesc,
3653 initialState,
3654 optimizedClearValue,
3655 maybeAllocation,
3656 riidResource,
3657 ppvResource);
3658 } else {
3659 *maybeAllocation = nullptr;
3660 D3D12_HEAP_PROPERTIES heapProps = {};
3661 heapProps.Type = heapType;
3662 return device->CreateCommittedResource(&heapProps,
3663 D3D12_HEAP_FLAG_NONE,
3664 resourceDesc,
3665 initialState,
3666 optimizedClearValue,
3667 riidResource,
3668 ppvResource);
3669 }
3670}
3671
3672void QD3D12MemoryAllocator::getBudget(D3D12MA::Budget *localBudget, D3D12MA::Budget *nonLocalBudget)
3673{
3674 if (allocator) {
3675 allocator->GetBudget(localBudget, nonLocalBudget);
3676 } else {
3677 *localBudget = {};
3678 *nonLocalBudget = {};
3679 }
3680}
3681
3682void QRhiD3D12::waitGpu()
3683{
3684 fullFenceCounter += 1u;
3685 if (SUCCEEDED(cmdQueue->Signal(fullFence, fullFenceCounter))) {
3686 if (SUCCEEDED(fullFence->SetEventOnCompletion(fullFenceCounter, fullFenceEvent)))
3687 WaitForSingleObject(fullFenceEvent, INFINITE);
3688 }
3689}
3690
3691DXGI_SAMPLE_DESC QRhiD3D12::effectiveSampleDesc(int sampleCount, DXGI_FORMAT format) const
3692{
3693 DXGI_SAMPLE_DESC desc;
3694 desc.Count = 1;
3695 desc.Quality = 0;
3696
3697 const int s = effectiveSampleCount(sampleCount);
3698
3699 if (s > 1) {
3700 D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msaaInfo = {};
3701 msaaInfo.Format = format;
3702 msaaInfo.SampleCount = UINT(s);
3703 if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &msaaInfo, sizeof(msaaInfo)))) {
3704 if (msaaInfo.NumQualityLevels > 0) {
3705 desc.Count = UINT(s);
3706 desc.Quality = msaaInfo.NumQualityLevels - 1;
3707 } else {
3708 qWarning("No quality levels for multisampling with sample count %d", s);
3709 }
3710 }
3711 }
3712
3713 return desc;
3714}
3715
3716bool QRhiD3D12::startCommandListForCurrentFrameSlot(D3D12GraphicsCommandList **cmdList)
3717{
3718 ID3D12CommandAllocator *cmdAlloc = cmdAllocators[currentFrameSlot];
3719 if (!*cmdList) {
3720 HRESULT hr = dev->CreateCommandList(0,
3721 D3D12_COMMAND_LIST_TYPE_DIRECT,
3722 cmdAlloc,
3723 nullptr,
3724 __uuidof(D3D12GraphicsCommandList),
3725 reinterpret_cast<void **>(cmdList));
3726 if (FAILED(hr)) {
3727 qWarning("Failed to create command list: %s", qPrintable(QSystemError::windowsComString(hr)));
3728 return false;
3729 }
3730 } else {
3731 HRESULT hr = (*cmdList)->Reset(cmdAlloc, nullptr);
3732 if (FAILED(hr)) {
3733 qWarning("Failed to reset command list: %s", qPrintable(QSystemError::windowsComString(hr)));
3734 return false;
3735 }
3736 }
3737 return true;
3738}
3739
3740static inline QRhiTexture::Format swapchainReadbackTextureFormat(DXGI_FORMAT format, QRhiTexture::Flags *flags)
3741{
3742 switch (format) {
3743 case DXGI_FORMAT_R8G8B8A8_UNORM:
3744 return QRhiTexture::RGBA8;
3745 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
3746 if (flags)
3747 (*flags) |= QRhiTexture::sRGB;
3748 return QRhiTexture::RGBA8;
3749 case DXGI_FORMAT_B8G8R8A8_UNORM:
3750 return QRhiTexture::BGRA8;
3751 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
3752 if (flags)
3753 (*flags) |= QRhiTexture::sRGB;
3754 return QRhiTexture::BGRA8;
3755 case DXGI_FORMAT_R16G16B16A16_FLOAT:
3756 return QRhiTexture::RGBA16F;
3757 case DXGI_FORMAT_R32G32B32A32_FLOAT:
3758 return QRhiTexture::RGBA32F;
3759 case DXGI_FORMAT_R10G10B10A2_UNORM:
3760 return QRhiTexture::RGB10A2;
3761 default:
3762 qWarning("DXGI_FORMAT %d cannot be read back", format);
3763 break;
3764 }
3765 return QRhiTexture::UnknownFormat;
3766}
3767
3768void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpdateBatch *resourceUpdates)
3769{
3770 QRhiResourceUpdateBatchPrivate *ud = QRhiResourceUpdateBatchPrivate::get(resourceUpdates);
3771
3772 for (int opIdx = 0; opIdx < ud->activeBufferOpCount; ++opIdx) {
3773 const QRhiResourceUpdateBatchPrivate::BufferOp &u(ud->bufferOps[opIdx]);
3774 if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::DynamicUpdate) {
3775 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, u.buf);
3776 Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic);
3777 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
3778 if (u.offset == 0 && u.data.size() == bufD->m_size)
3779 bufD->pendingHostWrites[i].clear();
3780 bufD->pendingHostWrites[i].append({ u.offset, u.data });
3781 }
3782 } else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) {
3783 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, u.buf);
3784 Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
3785 Q_ASSERT(u.offset + u.data.size() <= bufD->m_size);
3786
3787 // The general approach to staging upload data is to first try
3788 // using the per-frame "small" staging area, which is a very simple
3789 // linear allocator; if that's not big enough then create a
3790 // dedicated StagingArea and then deferred-release it to make sure
3791 // if stays alive while the frame is possibly still in flight.
3792
3793 QD3D12StagingArea::Allocation stagingAlloc;
3794 const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(bufD->m_size, 1);
3795 if (smallStagingAreas[currentFrameSlot].remainingCapacity() >= allocSize)
3796 stagingAlloc = smallStagingAreas[currentFrameSlot].get(bufD->m_size);
3797
3798 std::optional<QD3D12StagingArea> ownStagingArea;
3799 if (!stagingAlloc.isValid()) {
3800 ownStagingArea = QD3D12StagingArea();
3801 if (!ownStagingArea->create(this, allocSize, D3D12_HEAP_TYPE_UPLOAD))
3802 continue;
3803 stagingAlloc = ownStagingArea->get(allocSize);
3804 if (!stagingAlloc.isValid()) {
3805 ownStagingArea->destroy();
3806 continue;
3807 }
3808 }
3809
3810 memcpy(stagingAlloc.p + u.offset, u.data.constData(), u.data.size());
3811
3812 barrierGen.addTransitionBarrier(bufD->handles[0], D3D12_RESOURCE_STATE_COPY_DEST);
3813 barrierGen.enqueueBufferedTransitionBarriers(cbD);
3814
3815 if (QD3D12Resource *res = resourcePool.lookupRef(bufD->handles[0])) {
3816 cbD->cmdList->CopyBufferRegion(res->resource,
3817 u.offset,
3818 stagingAlloc.buffer,
3819 stagingAlloc.bufferOffset + u.offset,
3820 u.data.size());
3821 }
3822
3823 if (ownStagingArea.has_value())
3824 ownStagingArea->destroyWithDeferredRelease(&releaseQueue);
3825 } else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::Read) {
3826 QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, u.buf);
3827 if (bufD->m_type == QRhiBuffer::Dynamic) {
3828 bufD->executeHostWritesForFrameSlot(currentFrameSlot);
3829 if (QD3D12Resource *res = resourcePool.lookupRef(bufD->handles[currentFrameSlot])) {
3830 Q_ASSERT(res->cpuMapPtr);
3831 u.result->data.resize(u.readSize);
3832 memcpy(u.result->data.data(), reinterpret_cast<char *>(res->cpuMapPtr) + u.offset, u.readSize);
3833 }
3834 if (u.result->completed)
3835 u.result->completed();
3836 } else {
3837 QD3D12Readback readback;
3838 readback.frameSlot = currentFrameSlot;
3839 readback.result = u.result;
3840 readback.byteSize = u.readSize;
3841 const quint32 allocSize = aligned(u.readSize, QD3D12StagingArea::ALIGNMENT);
3842 if (!readback.staging.create(this, allocSize, D3D12_HEAP_TYPE_READBACK)) {
3843 if (u.result->completed)
3844 u.result->completed();
3845 continue;
3846 }
3847 QD3D12StagingArea::Allocation stagingAlloc = readback.staging.get(u.readSize);
3848 if (!stagingAlloc.isValid()) {
3849 readback.staging.destroy();
3850 if (u.result->completed)
3851 u.result->completed();
3852 continue;
3853 }
3854 Q_ASSERT(stagingAlloc.bufferOffset == 0);
3855 barrierGen.addTransitionBarrier(bufD->handles[0], D3D12_RESOURCE_STATE_COPY_SOURCE);
3856 barrierGen.enqueueBufferedTransitionBarriers(cbD);
3857 if (QD3D12Resource *res = resourcePool.lookupRef(bufD->handles[0])) {
3858 cbD->cmdList->CopyBufferRegion(stagingAlloc.buffer, 0, res->resource, u.offset, u.readSize);
3859 activeReadbacks.append(readback);
3860 } else {
3861 readback.staging.destroy();
3862 if (u.result->completed)
3863 u.result->completed();
3864 }
3865 }
3866 }
3867 }
3868
3869 for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) {
3870 const QRhiResourceUpdateBatchPrivate::TextureOp &u(ud->textureOps[opIdx]);
3871 if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Upload) {
3872 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, u.dst);
3873 const bool is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
3874 QD3D12Resource *res = resourcePool.lookupRef(texD->handle);
3875 if (!res)
3876 continue;
3877 barrierGen.addTransitionBarrier(texD->handle, D3D12_RESOURCE_STATE_COPY_DEST);
3878 barrierGen.enqueueBufferedTransitionBarriers(cbD);
3879 for (int layer = 0, maxLayer = u.subresDesc.size(); layer < maxLayer; ++layer) {
3880 for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) {
3881 for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level])) {
3882 D3D12_SUBRESOURCE_FOOTPRINT footprint = {};
3883 footprint.Format = res->desc.Format;
3884 footprint.Depth = 1;
3885 quint32 totalBytes = 0;
3886
3887 const QSize subresSize = subresDesc.sourceSize().isEmpty() ? q->sizeForMipLevel(level, texD->m_pixelSize)
3888 : subresDesc.sourceSize();
3889 const QPoint srcPos = subresDesc.sourceTopLeft();
3890 QPoint dstPos = subresDesc.destinationTopLeft();
3891
3892 if (!subresDesc.image().isNull()) {
3893 const QImage img = subresDesc.image();
3894 const int bpl = img.bytesPerLine();
3895 footprint.RowPitch = aligned<UINT>(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
3896 totalBytes = footprint.RowPitch * img.height();
3897 } else if (!subresDesc.data().isEmpty() && isCompressedFormat(texD->m_format)) {
3898 QSize blockDim;
3899 quint32 bpl = 0;
3900 compressedFormatInfo(texD->m_format, subresSize, &bpl, nullptr, &blockDim);
3901 footprint.RowPitch = aligned<UINT>(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
3902 const int rowCount = aligned(subresSize.height(), blockDim.height()) / blockDim.height();
3903 totalBytes = footprint.RowPitch * rowCount;
3904 } else if (!subresDesc.data().isEmpty()) {
3905 quint32 bpl = 0;
3906 if (subresDesc.dataStride())
3907 bpl = subresDesc.dataStride();
3908 else
3909 textureFormatInfo(texD->m_format, subresSize, &bpl, nullptr, nullptr);
3910 footprint.RowPitch = aligned<UINT>(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
3911 totalBytes = footprint.RowPitch * subresSize.height();
3912 } else {
3913 qWarning("Invalid texture upload for %p layer=%d mip=%d", texD, layer, level);
3914 continue;
3915 }
3916
3917 const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(totalBytes, 1);
3918 QD3D12StagingArea::Allocation stagingAlloc;
3919 if (smallStagingAreas[currentFrameSlot].remainingCapacity() >= allocSize)
3920 stagingAlloc = smallStagingAreas[currentFrameSlot].get(allocSize);
3921
3922 std::optional<QD3D12StagingArea> ownStagingArea;
3923 if (!stagingAlloc.isValid()) {
3924 ownStagingArea = QD3D12StagingArea();
3925 if (!ownStagingArea->create(this, allocSize, D3D12_HEAP_TYPE_UPLOAD))
3926 continue;
3927 stagingAlloc = ownStagingArea->get(allocSize);
3928 if (!stagingAlloc.isValid()) {
3929 ownStagingArea->destroy();
3930 continue;
3931 }
3932 }
3933
3934 D3D12_TEXTURE_COPY_LOCATION dst;
3935 dst.pResource = res->resource;
3936 dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
3937 dst.SubresourceIndex = calcSubresource(UINT(level), is3D ? 0u : UINT(layer), texD->mipLevelCount);
3938 D3D12_TEXTURE_COPY_LOCATION src;
3939 src.pResource = stagingAlloc.buffer;
3940 src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
3941 src.PlacedFootprint.Offset = stagingAlloc.bufferOffset;
3942
3943 D3D12_BOX srcBox; // back, right, bottom are exclusive
3944
3945 if (!subresDesc.image().isNull()) {
3946 const QImage img = subresDesc.image();
3947 const int bpc = qMax(1, img.depth() / 8);
3948 const int bpl = img.bytesPerLine();
3949
3950 QSize size = subresDesc.sourceSize().isEmpty() ? img.size() : subresDesc.sourceSize();
3951 size.setWidth(qMin(size.width(), img.width() - srcPos.x()));
3952 size.setHeight(qMin(size.height(), img.height() - srcPos.y()));
3953 size = clampedSubResourceUploadSize(size, dstPos, level, texD->m_pixelSize);
3954
3955 footprint.Width = size.width();
3956 footprint.Height = size.height();
3957
3958 srcBox.left = 0;
3959 srcBox.top = 0;
3960 srcBox.right = UINT(size.width());
3961 srcBox.bottom = UINT(size.height());
3962 srcBox.front = 0;
3963 srcBox.back = 1;
3964
3965 const uchar *imgPtr = img.constBits();
3966 const quint32 lineBytes = size.width() * bpc;
3967 for (int y = 0, h = size.height(); y < h; ++y) {
3968 memcpy(stagingAlloc.p + y * footprint.RowPitch,
3969 imgPtr + srcPos.x() * bpc + (y + srcPos.y()) * bpl,
3970 lineBytes);
3971 }
3972 } else if (!subresDesc.data().isEmpty() && isCompressedFormat(texD->m_format)) {
3973 QSize blockDim;
3974 quint32 bpl = 0;
3975 compressedFormatInfo(texD->m_format, subresSize, &bpl, nullptr, &blockDim);
3976 // x and y must be multiples of the block width and height
3977 dstPos.setX(aligned(dstPos.x(), blockDim.width()));
3978 dstPos.setY(aligned(dstPos.y(), blockDim.height()));
3979
3980 srcBox.left = 0;
3981 srcBox.top = 0;
3982 // width and height must be multiples of the block width and height
3983 srcBox.right = aligned(subresSize.width(), blockDim.width());
3984 srcBox.bottom = aligned(subresSize.height(), blockDim.height());
3985
3986 srcBox.front = 0;
3987 srcBox.back = 1;
3988
3989 footprint.Width = aligned(subresSize.width(), blockDim.width());
3990 footprint.Height = aligned(subresSize.height(), blockDim.height());
3991
3992 const quint32 copyBytes = qMin(bpl, footprint.RowPitch);
3993 const QByteArray imgData = subresDesc.data();
3994 const char *imgPtr = imgData.constData();
3995 const int rowCount = aligned(subresSize.height(), blockDim.height()) / blockDim.height();
3996 for (int y = 0; y < rowCount; ++y)
3997 memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + y * bpl, copyBytes);
3998 } else if (!subresDesc.data().isEmpty()) {
3999 srcBox.left = 0;
4000 srcBox.top = 0;
4001 srcBox.right = subresSize.width();
4002 srcBox.bottom = subresSize.height();
4003 srcBox.front = 0;
4004 srcBox.back = 1;
4005
4006 footprint.Width = subresSize.width();
4007 footprint.Height = subresSize.height();
4008
4009 quint32 bpl = 0;
4010 if (subresDesc.dataStride())
4011 bpl = subresDesc.dataStride();
4012 else
4013 textureFormatInfo(texD->m_format, subresSize, &bpl, nullptr, nullptr);
4014
4015 const quint32 copyBytes = qMin(bpl, footprint.RowPitch);
4016 const QByteArray data = subresDesc.data();
4017 const char *imgPtr = data.constData();
4018 for (int y = 0, h = subresSize.height(); y < h; ++y)
4019 memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + y * bpl, copyBytes);
4020 }
4021
4022 src.PlacedFootprint.Footprint = footprint;
4023
4024 cbD->cmdList->CopyTextureRegion(&dst,
4025 UINT(dstPos.x()),
4026 UINT(dstPos.y()),
4027 is3D ? UINT(layer) : 0u,
4028 &src,
4029 &srcBox);
4030
4031 if (ownStagingArea.has_value())
4032 ownStagingArea->destroyWithDeferredRelease(&releaseQueue);
4033 }
4034 }
4035 }
4036 } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Copy) {
4037 Q_ASSERT(u.src && u.dst);
4038 QD3D12Texture *srcD = QRHI_RES(QD3D12Texture, u.src);
4039 QD3D12Texture *dstD = QRHI_RES(QD3D12Texture, u.dst);
4040 const bool srcIs3D = srcD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
4041 const bool dstIs3D = dstD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
4042 QD3D12Resource *srcRes = resourcePool.lookupRef(srcD->handle);
4043 QD3D12Resource *dstRes = resourcePool.lookupRef(dstD->handle);
4044 if (!srcRes || !dstRes)
4045 continue;
4046
4047 barrierGen.addTransitionBarrier(srcD->handle, D3D12_RESOURCE_STATE_COPY_SOURCE);
4048 barrierGen.addTransitionBarrier(dstD->handle, D3D12_RESOURCE_STATE_COPY_DEST);
4049 barrierGen.enqueueBufferedTransitionBarriers(cbD);
4050
4051 const UINT srcSubresource = calcSubresource(UINT(u.desc.sourceLevel()),
4052 srcIs3D ? 0u : UINT(u.desc.sourceLayer()),
4053 srcD->mipLevelCount);
4054 const UINT dstSubresource = calcSubresource(UINT(u.desc.destinationLevel()),
4055 dstIs3D ? 0u : UINT(u.desc.destinationLayer()),
4056 dstD->mipLevelCount);
4057 const QPoint dp = u.desc.destinationTopLeft();
4058 const QSize mipSize = q->sizeForMipLevel(u.desc.sourceLevel(), srcD->m_pixelSize);
4059 const QSize copySize = u.desc.pixelSize().isEmpty() ? mipSize : u.desc.pixelSize();
4060 const QPoint sp = u.desc.sourceTopLeft();
4061
4062 D3D12_BOX srcBox;
4063 srcBox.left = UINT(sp.x());
4064 srcBox.top = UINT(sp.y());
4065 srcBox.front = srcIs3D ? UINT(u.desc.sourceLayer()) : 0u;
4066 // back, right, bottom are exclusive
4067 srcBox.right = srcBox.left + UINT(copySize.width());
4068 srcBox.bottom = srcBox.top + UINT(copySize.height());
4069 srcBox.back = srcBox.front + 1;
4070
4071 D3D12_TEXTURE_COPY_LOCATION src;
4072 src.pResource = srcRes->resource;
4073 src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
4074 src.SubresourceIndex = srcSubresource;
4075 D3D12_TEXTURE_COPY_LOCATION dst;
4076 dst.pResource = dstRes->resource;
4077 dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
4078 dst.SubresourceIndex = dstSubresource;
4079
4080 cbD->cmdList->CopyTextureRegion(&dst,
4081 UINT(dp.x()),
4082 UINT(dp.y()),
4083 dstIs3D ? UINT(u.desc.destinationLayer()) : 0u,
4084 &src,
4085 &srcBox);
4086 } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Read) {
4087 QD3D12Readback readback;
4088 readback.frameSlot = currentFrameSlot;
4089 readback.result = u.result;
4090
4091 QD3D12ObjectHandle srcHandle;
4092 QRect rect;
4093 bool is3D = false;
4094 if (u.rb.texture()) {
4095 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, u.rb.texture());
4096 if (texD->sampleDesc.Count > 1) {
4097 qWarning("Multisample texture cannot be read back");
4098 continue;
4099 }
4100 is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
4101 if (u.rb.rect().isValid())
4102 rect = u.rb.rect();
4103 else
4104 rect = QRect({0, 0}, q->sizeForMipLevel(u.rb.level(), texD->m_pixelSize));
4105 readback.format = texD->m_format;
4106 srcHandle = texD->handle;
4107 } else {
4108 Q_ASSERT(currentSwapChain);
4109 if (u.rb.rect().isValid())
4110 rect = u.rb.rect();
4111 else
4112 rect = QRect({0, 0}, currentSwapChain->pixelSize);
4113 readback.format = swapchainReadbackTextureFormat(currentSwapChain->colorFormat, nullptr);
4114 if (readback.format == QRhiTexture::UnknownFormat)
4115 continue;
4116 srcHandle = currentSwapChain->colorBuffers[currentSwapChain->currentBackBufferIndex];
4117 }
4118 readback.pixelSize = rect.size();
4119
4120 textureFormatInfo(readback.format,
4121 readback.pixelSize,
4122 &readback.bytesPerLine,
4123 &readback.byteSize,
4124 nullptr);
4125
4126 QD3D12Resource *srcRes = resourcePool.lookupRef(srcHandle);
4127 if (!srcRes)
4128 continue;
4129
4130 const UINT subresource = calcSubresource(UINT(u.rb.level()),
4131 is3D ? 0u : UINT(u.rb.layer()),
4132 srcRes->desc.MipLevels);
4133 D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout;
4134 // totalBytes is what we get from D3D, with the 256 aligned stride,
4135 // readback.byteSize is the final result that's not relevant here yet
4136 UINT64 totalBytes = 0;
4137 dev->GetCopyableFootprints(&srcRes->desc, subresource, 1, 0,
4138 &layout, nullptr, nullptr, &totalBytes);
4139 readback.stagingRowPitch = layout.Footprint.RowPitch;
4140
4141 const quint32 allocSize = aligned<quint32>(totalBytes, QD3D12StagingArea::ALIGNMENT);
4142 if (!readback.staging.create(this, allocSize, D3D12_HEAP_TYPE_READBACK)) {
4143 if (u.result->completed)
4144 u.result->completed();
4145 continue;
4146 }
4147 QD3D12StagingArea::Allocation stagingAlloc = readback.staging.get(totalBytes);
4148 if (!stagingAlloc.isValid()) {
4149 readback.staging.destroy();
4150 if (u.result->completed)
4151 u.result->completed();
4152 continue;
4153 }
4154 Q_ASSERT(stagingAlloc.bufferOffset == 0);
4155
4156 barrierGen.addTransitionBarrier(srcHandle, D3D12_RESOURCE_STATE_COPY_SOURCE);
4157 barrierGen.enqueueBufferedTransitionBarriers(cbD);
4158
4159 D3D12_TEXTURE_COPY_LOCATION dst;
4160 dst.pResource = stagingAlloc.buffer;
4161 dst.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
4162 dst.PlacedFootprint.Offset = 0;
4163 dst.PlacedFootprint.Footprint = layout.Footprint;
4164
4165 D3D12_TEXTURE_COPY_LOCATION src;
4166 src.pResource = srcRes->resource;
4167 src.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
4168 src.SubresourceIndex = subresource;
4169
4170 D3D12_BOX srcBox = {};
4171 srcBox.left = UINT(rect.left());
4172 srcBox.top = UINT(rect.top());
4173 srcBox.front = is3D ? UINT(u.rb.layer()) : 0u;
4174 // back, right, bottom are exclusive
4175 srcBox.right = srcBox.left + UINT(rect.width());
4176 srcBox.bottom = srcBox.top + UINT(rect.height());
4177 srcBox.back = srcBox.front + 1;
4178
4179 cbD->cmdList->CopyTextureRegion(&dst, 0, 0, 0, &src, &srcBox);
4180 activeReadbacks.append(readback);
4181 } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::GenMips) {
4182 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, u.dst);
4183 Q_ASSERT(texD->flags().testFlag(QRhiTexture::UsedWithGenerateMips));
4184 if (texD->flags().testFlag(QRhiTexture::ThreeDimensional))
4185 mipmapGen3D.generate(cbD, texD->handle);
4186 else
4187 mipmapGen.generate(cbD, texD->handle);
4188 }
4189 }
4190
4191 ud->free();
4192}
4193
4194void QRhiD3D12::finishActiveReadbacks(bool forced)
4195{
4196 QVarLengthArray<std::function<void()>, 4> completedCallbacks;
4197
4198 for (int i = activeReadbacks.size() - 1; i >= 0; --i) {
4199 QD3D12Readback &readback(activeReadbacks[i]);
4200 if (forced || currentFrameSlot == readback.frameSlot || readback.frameSlot < 0) {
4201 readback.result->format = readback.format;
4202 readback.result->pixelSize = readback.pixelSize;
4203 readback.result->data.resize(int(readback.byteSize));
4204
4205 if (readback.format != QRhiTexture::UnknownFormat) {
4206 quint8 *dstPtr = reinterpret_cast<quint8 *>(readback.result->data.data());
4207 const quint8 *srcPtr = readback.staging.mem.p;
4208 const quint32 lineSize = qMin(readback.bytesPerLine, readback.stagingRowPitch);
4209 for (int y = 0, h = readback.pixelSize.height(); y < h; ++y)
4210 memcpy(dstPtr + y * readback.bytesPerLine, srcPtr + y * readback.stagingRowPitch, lineSize);
4211 } else {
4212 memcpy(readback.result->data.data(), readback.staging.mem.p, readback.byteSize);
4213 }
4214
4215 readback.staging.destroy();
4216
4217 if (readback.result->completed)
4218 completedCallbacks.append(readback.result->completed);
4219
4220 activeReadbacks.remove(i);
4221 }
4222 }
4223
4224 for (auto f : completedCallbacks)
4225 f();
4226}
4227
4228bool QRhiD3D12::ensureShaderVisibleDescriptorHeapCapacity(QD3D12ShaderVisibleDescriptorHeap *h,
4229 D3D12_DESCRIPTOR_HEAP_TYPE type,
4230 int frameSlot,
4231 quint32 neededDescriptorCount,
4232 bool *gotNew)
4233{
4234 // Gets a new heap if needed. Note that the capacity we get is clamped
4235 // automatically (e.g. to 1 million, or 2048 for samplers), so * 2 does not
4236 // mean we can grow indefinitely, then again even using the same size would
4237 // work (because we what we are after here is a new heap for the rest of
4238 // the commands, not affecting what's already recorded).
4239 if (h->perFrameHeapSlice[frameSlot].remainingCapacity() < neededDescriptorCount) {
4240 const quint32 newPerFrameSize = qMax(h->perFrameHeapSlice[frameSlot].capacity * 2,
4241 neededDescriptorCount);
4242 QD3D12ShaderVisibleDescriptorHeap newHeap;
4243 if (!newHeap.create(dev, type, newPerFrameSize)) {
4244 qWarning("Could not create new shader-visible descriptor heap");
4245 return false;
4246 }
4247 h->destroyWithDeferredRelease(&releaseQueue);
4248 *h = newHeap;
4249 *gotNew = true;
4250 }
4251 return true;
4252}
4253
4254void QRhiD3D12::bindShaderVisibleHeaps(QD3D12CommandBuffer *cbD)
4255{
4256 ID3D12DescriptorHeap *heaps[] = {
4257 shaderVisibleCbvSrvUavHeap.heap.heap,
4258 samplerMgr.shaderVisibleSamplerHeap.heap.heap
4259 };
4260 cbD->cmdList->SetDescriptorHeaps(2, heaps);
4261}
4262
4263QD3D12Buffer::QD3D12Buffer(QRhiImplementation *rhi, Type type, UsageFlags usage, quint32 size)
4264 : QRhiBuffer(rhi, type, usage, size)
4265{
4266}
4267
4268QD3D12Buffer::~QD3D12Buffer()
4269{
4270 destroy();
4271}
4272
4273void QD3D12Buffer::destroy()
4274{
4275 if (handles[0].isNull())
4276 return;
4277
4278 QRHI_RES_RHI(QRhiD3D12);
4279
4280 // destroy() implementations, unlike other functions, are expected to test
4281 // for m_rhi (rhiD) being null, to allow surviving in case one attempts to
4282 // destroy a (leaked) resource after the QRhi.
4283 //
4284 // If there is no QRhi anymore, we do not deferred-release but that's fine
4285 // since the QRhi already released everything that was in the resourcePool.
4286
4287 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
4288 if (rhiD)
4289 rhiD->releaseQueue.deferredReleaseResource(handles[i]);
4290 handles[i] = {};
4291 pendingHostWrites[i].clear();
4292 }
4293
4294 if (rhiD)
4295 rhiD->unregisterResource(this);
4296}
4297
4298bool QD3D12Buffer::create()
4299{
4300 if (!handles[0].isNull())
4301 destroy();
4302
4303 if (m_usage.testFlag(QRhiBuffer::UniformBuffer) && m_type != Dynamic) {
4304 qWarning("UniformBuffer must always be Dynamic");
4305 return false;
4306 }
4307
4308 if (m_usage.testFlag(QRhiBuffer::StorageBuffer) && m_type == Dynamic) {
4309 qWarning("StorageBuffer cannot be combined with Dynamic");
4310 return false;
4311 }
4312
4313 const quint32 nonZeroSize = m_size <= 0 ? 256 : m_size;
4314 const quint32 roundedSize = aligned(nonZeroSize, m_usage.testFlag(QRhiBuffer::UniformBuffer) ? 256u : 4u);
4315
4316 UINT resourceFlags = D3D12_RESOURCE_FLAG_NONE;
4317 if (m_usage.testFlag(QRhiBuffer::StorageBuffer))
4318 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
4319
4320 QRHI_RES_RHI(QRhiD3D12);
4321 HRESULT hr = 0;
4322 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
4323 if (i == 0 || m_type == Dynamic) {
4324 D3D12_RESOURCE_DESC resourceDesc = {};
4325 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
4326 resourceDesc.Width = roundedSize;
4327 resourceDesc.Height = 1;
4328 resourceDesc.DepthOrArraySize = 1;
4329 resourceDesc.MipLevels = 1;
4330 resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
4331 resourceDesc.SampleDesc = { 1, 0 };
4332 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
4333 resourceDesc.Flags = D3D12_RESOURCE_FLAGS(resourceFlags);
4334 ID3D12Resource *resource = nullptr;
4335 D3D12MA::Allocation *allocation = nullptr;
4336 // Dynamic == host (CPU) visible
4337 D3D12_HEAP_TYPE heapType = m_type == Dynamic
4338 ? D3D12_HEAP_TYPE_UPLOAD
4339 : D3D12_HEAP_TYPE_DEFAULT;
4340 D3D12_RESOURCE_STATES resourceState = m_type == Dynamic
4341 ? D3D12_RESOURCE_STATE_GENERIC_READ
4342 : D3D12_RESOURCE_STATE_COMMON;
4343 hr = rhiD->vma.createResource(heapType,
4344 &resourceDesc,
4345 resourceState,
4346 nullptr,
4347 &allocation,
4348 __uuidof(resource),
4349 reinterpret_cast<void **>(&resource));
4350 if (FAILED(hr))
4351 break;
4352 if (!m_objectName.isEmpty()) {
4353 QString decoratedName = QString::fromUtf8(m_objectName);
4354 if (m_type == Dynamic) {
4355 decoratedName += QLatin1Char('/');
4356 decoratedName += QString::number(i);
4357 }
4358 resource->SetName(reinterpret_cast<LPCWSTR>(decoratedName.utf16()));
4359 }
4360 void *cpuMemPtr = nullptr;
4361 if (m_type == Dynamic) {
4362 // will be mapped for ever on the CPU, this makes future host write operations very simple
4363 hr = resource->Map(0, nullptr, &cpuMemPtr);
4364 if (FAILED(hr)) {
4365 qWarning("Map() failed to dynamic buffer");
4366 resource->Release();
4367 if (allocation)
4368 allocation->Release();
4369 break;
4370 }
4371 }
4372 handles[i] = QD3D12Resource::addToPool(&rhiD->resourcePool,
4373 resource,
4374 resourceState,
4375 allocation,
4376 cpuMemPtr);
4377 }
4378 }
4379 if (FAILED(hr)) {
4380 qWarning("Failed to create buffer: '%s' Type was %d, size was %u, using D3D12MA was %d.",
4381 qPrintable(QSystemError::windowsComString(hr)),
4382 int(m_type),
4383 roundedSize,
4384 int(rhiD->vma.isUsingD3D12MA()));
4385 return false;
4386 }
4387
4388 rhiD->registerResource(this);
4389 return true;
4390}
4391
4392QRhiBuffer::NativeBuffer QD3D12Buffer::nativeBuffer()
4393{
4394 NativeBuffer b;
4395 Q_ASSERT(sizeof(b.objects) / sizeof(b.objects[0]) >= size_t(QD3D12_FRAMES_IN_FLIGHT));
4396 QRHI_RES_RHI(QRhiD3D12);
4397 if (m_type == Dynamic) {
4398 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
4399 executeHostWritesForFrameSlot(i);
4400 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handles[i]))
4401 b.objects[i] = res->resource;
4402 else
4403 b.objects[i] = nullptr;
4404 }
4405 b.slotCount = QD3D12_FRAMES_IN_FLIGHT;
4406 return b;
4407 }
4408 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handles[0]))
4409 b.objects[0] = res->resource;
4410 else
4411 b.objects[0] = nullptr;
4412 b.slotCount = 1;
4413 return b;
4414}
4415
4416char *QD3D12Buffer::beginFullDynamicBufferUpdateForCurrentFrame()
4417{
4418 // Shortcut the entire buffer update mechanism and allow the client to do
4419 // the host writes directly to the buffer. This will lead to unexpected
4420 // results when combined with QRhiResourceUpdateBatch-based updates for the
4421 // buffer, but provides a fast path for dynamic buffers that have all their
4422 // content changed in every frame.
4423
4424 Q_ASSERT(m_type == Dynamic);
4425 QRHI_RES_RHI(QRhiD3D12);
4426 Q_ASSERT(rhiD->inFrame);
4427 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handles[rhiD->currentFrameSlot]))
4428 return static_cast<char *>(res->cpuMapPtr);
4429
4430 return nullptr;
4431}
4432
4433void QD3D12Buffer::endFullDynamicBufferUpdateForCurrentFrame()
4434{
4435 // nothing to do here
4436}
4437
4438void QD3D12Buffer::executeHostWritesForFrameSlot(int frameSlot)
4439{
4440 if (pendingHostWrites[frameSlot].isEmpty())
4441 return;
4442
4443 Q_ASSERT(m_type == QRhiBuffer::Dynamic);
4444 QRHI_RES_RHI(QRhiD3D12);
4445 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handles[frameSlot])) {
4446 Q_ASSERT(res->cpuMapPtr);
4447 for (const QD3D12Buffer::HostWrite &u : std::as_const(pendingHostWrites[frameSlot]))
4448 memcpy(static_cast<char *>(res->cpuMapPtr) + u.offset, u.data.constData(), u.data.size());
4449 }
4450 pendingHostWrites[frameSlot].clear();
4451}
4452
4453static inline DXGI_FORMAT toD3DTextureFormat(QRhiTexture::Format format, QRhiTexture::Flags flags)
4454{
4455 const bool srgb = flags.testFlag(QRhiTexture::sRGB);
4456 switch (format) {
4457 case QRhiTexture::RGBA8:
4458 return srgb ? DXGI_FORMAT_R8G8B8A8_UNORM_SRGB : DXGI_FORMAT_R8G8B8A8_UNORM;
4459 case QRhiTexture::BGRA8:
4460 return srgb ? DXGI_FORMAT_B8G8R8A8_UNORM_SRGB : DXGI_FORMAT_B8G8R8A8_UNORM;
4461 case QRhiTexture::R8:
4462 return DXGI_FORMAT_R8_UNORM;
4463 case QRhiTexture::R8SI:
4464 return DXGI_FORMAT_R8_SINT;
4465 case QRhiTexture::R8UI:
4466 return DXGI_FORMAT_R8_UINT;
4467 case QRhiTexture::RG8:
4468 return DXGI_FORMAT_R8G8_UNORM;
4469 case QRhiTexture::R16:
4470 return DXGI_FORMAT_R16_UNORM;
4471 case QRhiTexture::RG16:
4472 return DXGI_FORMAT_R16G16_UNORM;
4473 case QRhiTexture::RED_OR_ALPHA8:
4474 return DXGI_FORMAT_R8_UNORM;
4475
4476 case QRhiTexture::RGBA16F:
4477 return DXGI_FORMAT_R16G16B16A16_FLOAT;
4478 case QRhiTexture::RGBA32F:
4479 return DXGI_FORMAT_R32G32B32A32_FLOAT;
4480 case QRhiTexture::R16F:
4481 return DXGI_FORMAT_R16_FLOAT;
4482 case QRhiTexture::R32F:
4483 return DXGI_FORMAT_R32_FLOAT;
4484
4485 case QRhiTexture::RGB10A2:
4486 return DXGI_FORMAT_R10G10B10A2_UNORM;
4487
4488 case QRhiTexture::R32SI:
4489 return DXGI_FORMAT_R32_SINT;
4490 case QRhiTexture::R32UI:
4491 return DXGI_FORMAT_R32_UINT;
4492 case QRhiTexture::RG32SI:
4493 return DXGI_FORMAT_R32G32_SINT;
4494 case QRhiTexture::RG32UI:
4495 return DXGI_FORMAT_R32G32_UINT;
4496 case QRhiTexture::RGBA32SI:
4497 return DXGI_FORMAT_R32G32B32A32_SINT;
4498 case QRhiTexture::RGBA32UI:
4499 return DXGI_FORMAT_R32G32B32A32_UINT;
4500
4501 case QRhiTexture::D16:
4502 return DXGI_FORMAT_R16_TYPELESS;
4503 case QRhiTexture::D24:
4504 return DXGI_FORMAT_R24G8_TYPELESS;
4505 case QRhiTexture::D24S8:
4506 return DXGI_FORMAT_R24G8_TYPELESS;
4507 case QRhiTexture::D32F:
4508 return DXGI_FORMAT_R32_TYPELESS;
4509 case QRhiTexture::Format::D32FS8:
4510 return DXGI_FORMAT_R32G8X24_TYPELESS;
4511
4512 case QRhiTexture::BC1:
4513 return srgb ? DXGI_FORMAT_BC1_UNORM_SRGB : DXGI_FORMAT_BC1_UNORM;
4514 case QRhiTexture::BC2:
4515 return srgb ? DXGI_FORMAT_BC2_UNORM_SRGB : DXGI_FORMAT_BC2_UNORM;
4516 case QRhiTexture::BC3:
4517 return srgb ? DXGI_FORMAT_BC3_UNORM_SRGB : DXGI_FORMAT_BC3_UNORM;
4518 case QRhiTexture::BC4:
4519 return DXGI_FORMAT_BC4_UNORM;
4520 case QRhiTexture::BC5:
4521 return DXGI_FORMAT_BC5_UNORM;
4522 case QRhiTexture::BC6H:
4523 return DXGI_FORMAT_BC6H_UF16;
4524 case QRhiTexture::BC7:
4525 return srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM;
4526
4527 case QRhiTexture::ETC2_RGB8:
4528 case QRhiTexture::ETC2_RGB8A1:
4529 case QRhiTexture::ETC2_RGBA8:
4530 qWarning("QRhiD3D12 does not support ETC2 textures");
4531 return DXGI_FORMAT_R8G8B8A8_UNORM;
4532
4533 case QRhiTexture::ASTC_4x4:
4534 case QRhiTexture::ASTC_5x4:
4535 case QRhiTexture::ASTC_5x5:
4536 case QRhiTexture::ASTC_6x5:
4537 case QRhiTexture::ASTC_6x6:
4538 case QRhiTexture::ASTC_8x5:
4539 case QRhiTexture::ASTC_8x6:
4540 case QRhiTexture::ASTC_8x8:
4541 case QRhiTexture::ASTC_10x5:
4542 case QRhiTexture::ASTC_10x6:
4543 case QRhiTexture::ASTC_10x8:
4544 case QRhiTexture::ASTC_10x10:
4545 case QRhiTexture::ASTC_12x10:
4546 case QRhiTexture::ASTC_12x12:
4547 qWarning("QRhiD3D12 does not support ASTC textures");
4548 return DXGI_FORMAT_R8G8B8A8_UNORM;
4549
4550 default:
4551 break;
4552 }
4553 return DXGI_FORMAT_R8G8B8A8_UNORM;
4554}
4555
4556QD3D12RenderBuffer::QD3D12RenderBuffer(QRhiImplementation *rhi,
4557 Type type,
4558 const QSize &pixelSize,
4559 int sampleCount,
4560 Flags flags,
4561 QRhiTexture::Format backingFormatHint)
4562 : QRhiRenderBuffer(rhi, type, pixelSize, sampleCount, flags, backingFormatHint)
4563{
4564}
4565
4566QD3D12RenderBuffer::~QD3D12RenderBuffer()
4567{
4568 destroy();
4569}
4570
4571void QD3D12RenderBuffer::destroy()
4572{
4573 if (handle.isNull())
4574 return;
4575
4576 QRHI_RES_RHI(QRhiD3D12);
4577 if (rhiD) {
4578 if (rtv.isValid())
4579 rhiD->releaseQueue.deferredReleaseResourceWithViews(handle, &rhiD->rtvPool, rtv, 1);
4580 else if (dsv.isValid())
4581 rhiD->releaseQueue.deferredReleaseResourceWithViews(handle, &rhiD->dsvPool, dsv, 1);
4582 }
4583
4584 handle = {};
4585 rtv = {};
4586 dsv = {};
4587
4588 if (rhiD)
4589 rhiD->unregisterResource(this);
4590}
4591
4592bool QD3D12RenderBuffer::create()
4593{
4594 if (!handle.isNull())
4595 destroy();
4596
4597 if (m_pixelSize.isEmpty())
4598 return false;
4599
4600 QRHI_RES_RHI(QRhiD3D12);
4601
4602 switch (m_type) {
4603 case QRhiRenderBuffer::Color:
4604 {
4605 dxgiFormat = toD3DTextureFormat(backingFormat(), {});
4606 sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, dxgiFormat);
4607 D3D12_RESOURCE_DESC resourceDesc = {};
4608 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
4609 resourceDesc.Width = UINT64(m_pixelSize.width());
4610 resourceDesc.Height = UINT(m_pixelSize.height());
4611 resourceDesc.DepthOrArraySize = 1;
4612 resourceDesc.MipLevels = 1;
4613 resourceDesc.Format = dxgiFormat;
4614 resourceDesc.SampleDesc = sampleDesc;
4615 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
4616 resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
4617 D3D12_CLEAR_VALUE clearValue = {};
4618 clearValue.Format = dxgiFormat;
4619 // have a separate allocation and resource object (meaning both will need its own Release())
4620 ID3D12Resource *resource = nullptr;
4621 D3D12MA::Allocation *allocation = nullptr;
4622 HRESULT hr = rhiD->vma.createResource(D3D12_HEAP_TYPE_DEFAULT,
4623 &resourceDesc,
4624 D3D12_RESOURCE_STATE_RENDER_TARGET,
4625 &clearValue,
4626 &allocation,
4627 __uuidof(ID3D12Resource),
4628 reinterpret_cast<void **>(&resource));
4629 if (FAILED(hr)) {
4630 qWarning("Failed to create color buffer: %s", qPrintable(QSystemError::windowsComString(hr)));
4631 return false;
4632 }
4633 handle = QD3D12Resource::addToPool(&rhiD->resourcePool, resource, D3D12_RESOURCE_STATE_RENDER_TARGET, allocation);
4634 rtv = rhiD->rtvPool.allocate(1);
4635 if (!rtv.isValid())
4636 return false;
4637 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
4638 rtvDesc.Format = dxgiFormat;
4639 rtvDesc.ViewDimension = sampleDesc.Count > 1 ? D3D12_RTV_DIMENSION_TEXTURE2DMS
4640 : D3D12_RTV_DIMENSION_TEXTURE2D;
4641 rhiD->dev->CreateRenderTargetView(resource, &rtvDesc, rtv.cpuHandle);
4642 }
4643 break;
4644 case QRhiRenderBuffer::DepthStencil:
4645 {
4646 dxgiFormat = DS_FORMAT;
4647 sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, dxgiFormat);
4648 D3D12_RESOURCE_DESC resourceDesc = {};
4649 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
4650 resourceDesc.Width = UINT64(m_pixelSize.width());
4651 resourceDesc.Height = UINT(m_pixelSize.height());
4652 resourceDesc.DepthOrArraySize = 1;
4653 resourceDesc.MipLevels = 1;
4654 resourceDesc.Format = dxgiFormat;
4655 resourceDesc.SampleDesc = sampleDesc;
4656 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
4657 resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
4658 if (m_flags.testFlag(UsedWithSwapChainOnly))
4659 resourceDesc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
4660 D3D12_CLEAR_VALUE clearValue = {};
4661 clearValue.Format = dxgiFormat;
4662 clearValue.DepthStencil.Depth = 1.0f;
4663 clearValue.DepthStencil.Stencil = 0;
4664 ID3D12Resource *resource = nullptr;
4665 D3D12MA::Allocation *allocation = nullptr;
4666 HRESULT hr = rhiD->vma.createResource(D3D12_HEAP_TYPE_DEFAULT,
4667 &resourceDesc,
4668 D3D12_RESOURCE_STATE_DEPTH_WRITE,
4669 &clearValue,
4670 &allocation,
4671 __uuidof(ID3D12Resource),
4672 reinterpret_cast<void **>(&resource));
4673 if (FAILED(hr)) {
4674 qWarning("Failed to create depth-stencil buffer: %s", qPrintable(QSystemError::windowsComString(hr)));
4675 return false;
4676 }
4677 handle = QD3D12Resource::addToPool(&rhiD->resourcePool, resource, D3D12_RESOURCE_STATE_DEPTH_WRITE, allocation);
4678 dsv = rhiD->dsvPool.allocate(1);
4679 if (!dsv.isValid())
4680 return false;
4681 D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {};
4682 dsvDesc.Format = dxgiFormat;
4683 dsvDesc.ViewDimension = sampleDesc.Count > 1 ? D3D12_DSV_DIMENSION_TEXTURE2DMS
4684 : D3D12_DSV_DIMENSION_TEXTURE2D;
4685 rhiD->dev->CreateDepthStencilView(resource, &dsvDesc, dsv.cpuHandle);
4686 }
4687 break;
4688 }
4689
4690 if (!m_objectName.isEmpty()) {
4691 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handle)) {
4692 const QString name = QString::fromUtf8(m_objectName);
4693 res->resource->SetName(reinterpret_cast<LPCWSTR>(name.utf16()));
4694 }
4695 }
4696
4697 generation += 1;
4698 rhiD->registerResource(this);
4699 return true;
4700}
4701
4702QRhiTexture::Format QD3D12RenderBuffer::backingFormat() const
4703{
4704 if (m_backingFormatHint != QRhiTexture::UnknownFormat)
4705 return m_backingFormatHint;
4706 else
4707 return m_type == Color ? QRhiTexture::RGBA8 : QRhiTexture::UnknownFormat;
4708}
4709
4710QD3D12Texture::QD3D12Texture(QRhiImplementation *rhi, Format format, const QSize &pixelSize, int depth,
4711 int arraySize, int sampleCount, Flags flags)
4712 : QRhiTexture(rhi, format, pixelSize, depth, arraySize, sampleCount, flags)
4713{
4714}
4715
4716QD3D12Texture::~QD3D12Texture()
4717{
4718 destroy();
4719}
4720
4721void QD3D12Texture::destroy()
4722{
4723 if (handle.isNull())
4724 return;
4725
4726 QRHI_RES_RHI(QRhiD3D12);
4727 if (rhiD)
4728 rhiD->releaseQueue.deferredReleaseResourceWithViews(handle, &rhiD->cbvSrvUavPool, srv, 1);
4729
4730 handle = {};
4731 srv = {};
4732
4733 if (rhiD)
4734 rhiD->unregisterResource(this);
4735}
4736
4737static inline DXGI_FORMAT toD3DDepthTextureSRVFormat(QRhiTexture::Format format)
4738{
4739 switch (format) {
4740 case QRhiTexture::Format::D16:
4741 return DXGI_FORMAT_R16_FLOAT;
4742 case QRhiTexture::Format::D24:
4743 return DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
4744 case QRhiTexture::Format::D24S8:
4745 return DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
4746 case QRhiTexture::Format::D32F:
4747 return DXGI_FORMAT_R32_FLOAT;
4748 case QRhiTexture::Format::D32FS8:
4749 return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
4750 default:
4751 break;
4752 }
4753 Q_UNREACHABLE_RETURN(DXGI_FORMAT_R32_FLOAT);
4754}
4755
4756static inline DXGI_FORMAT toD3DDepthTextureDSVFormat(QRhiTexture::Format format)
4757{
4758 // here the result cannot be typeless
4759 switch (format) {
4760 case QRhiTexture::Format::D16:
4761 return DXGI_FORMAT_D16_UNORM;
4762 case QRhiTexture::Format::D24:
4763 return DXGI_FORMAT_D24_UNORM_S8_UINT;
4764 case QRhiTexture::Format::D24S8:
4765 return DXGI_FORMAT_D24_UNORM_S8_UINT;
4766 case QRhiTexture::Format::D32F:
4767 return DXGI_FORMAT_D32_FLOAT;
4768 case QRhiTexture::Format::D32FS8:
4769 return DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
4770 default:
4771 break;
4772 }
4773 Q_UNREACHABLE_RETURN(DXGI_FORMAT_D32_FLOAT);
4774}
4775
4776static inline bool isDepthTextureFormat(QRhiTexture::Format format)
4777{
4778 switch (format) {
4779 case QRhiTexture::Format::D16:
4780 case QRhiTexture::Format::D24:
4781 case QRhiTexture::Format::D24S8:
4782 case QRhiTexture::Format::D32F:
4783 case QRhiTexture::Format::D32FS8:
4784 return true;
4785 default:
4786 return false;
4787 }
4788}
4789
4790bool QD3D12Texture::prepareCreate(QSize *adjustedSize)
4791{
4792 if (!handle.isNull())
4793 destroy();
4794
4795 QRHI_RES_RHI(QRhiD3D12);
4796 if (!rhiD->isTextureFormatSupported(m_format, m_flags))
4797 return false;
4798
4799 const bool isDepth = isDepthTextureFormat(m_format);
4800 const bool isCube = m_flags.testFlag(CubeMap);
4801 const bool is3D = m_flags.testFlag(ThreeDimensional);
4802 const bool isArray = m_flags.testFlag(TextureArray);
4803 const bool hasMipMaps = m_flags.testFlag(MipMapped);
4804 const bool is1D = m_flags.testFlag(OneDimensional);
4805
4806 const QSize size = is1D ? QSize(qMax(1, m_pixelSize.width()), 1)
4807 : (m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize);
4808
4809 dxgiFormat = toD3DTextureFormat(m_format, m_flags);
4810 if (isDepth) {
4811 srvFormat = toD3DDepthTextureSRVFormat(m_format);
4812 rtFormat = toD3DDepthTextureDSVFormat(m_format);
4813 } else {
4814 srvFormat = dxgiFormat;
4815 rtFormat = dxgiFormat;
4816 }
4817 if (m_writeViewFormat.format != UnknownFormat) {
4818 if (isDepth)
4819 rtFormat = toD3DDepthTextureDSVFormat(m_writeViewFormat.format);
4820 else
4821 rtFormat = toD3DTextureFormat(m_writeViewFormat.format, m_writeViewFormat.srgb ? sRGB : Flags());
4822 }
4823 if (m_readViewFormat.format != UnknownFormat) {
4824 if (isDepth)
4825 srvFormat = toD3DDepthTextureSRVFormat(m_readViewFormat.format);
4826 else
4827 srvFormat = toD3DTextureFormat(m_readViewFormat.format, m_readViewFormat.srgb ? sRGB : Flags());
4828 }
4829
4830 mipLevelCount = uint(hasMipMaps ? rhiD->q->mipLevelsForSize(size) : 1);
4831 sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, dxgiFormat);
4832 if (sampleDesc.Count > 1) {
4833 if (isCube) {
4834 qWarning("Cubemap texture cannot be multisample");
4835 return false;
4836 }
4837 if (is3D) {
4838 qWarning("3D texture cannot be multisample");
4839 return false;
4840 }
4841 if (hasMipMaps) {
4842 qWarning("Multisample texture cannot have mipmaps");
4843 return false;
4844 }
4845 }
4846 if (isDepth && hasMipMaps) {
4847 qWarning("Depth texture cannot have mipmaps");
4848 return false;
4849 }
4850 if (isCube && is3D) {
4851 qWarning("Texture cannot be both cube and 3D");
4852 return false;
4853 }
4854 if (isArray && is3D) {
4855 qWarning("Texture cannot be both array and 3D");
4856 return false;
4857 }
4858 if (isCube && is1D) {
4859 qWarning("Texture cannot be both cube and 1D");
4860 return false;
4861 }
4862 if (is1D && is3D) {
4863 qWarning("Texture cannot be both 1D and 3D");
4864 return false;
4865 }
4866 if (m_depth > 1 && !is3D) {
4867 qWarning("Texture cannot have a depth of %d when it is not 3D", m_depth);
4868 return false;
4869 }
4870 if (m_arraySize > 0 && !isArray) {
4871 qWarning("Texture cannot have an array size of %d when it is not an array", m_arraySize);
4872 return false;
4873 }
4874 if (m_arraySize < 1 && isArray) {
4875 qWarning("Texture is an array but array size is %d", m_arraySize);
4876 return false;
4877 }
4878
4879 if (adjustedSize)
4880 *adjustedSize = size;
4881
4882 return true;
4883}
4884
4885bool QD3D12Texture::finishCreate()
4886{
4887 QRHI_RES_RHI(QRhiD3D12);
4888 const bool isCube = m_flags.testFlag(CubeMap);
4889 const bool is3D = m_flags.testFlag(ThreeDimensional);
4890 const bool isArray = m_flags.testFlag(TextureArray);
4891 const bool is1D = m_flags.testFlag(OneDimensional);
4892
4893 D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
4894 srvDesc.Format = srvFormat;
4895 srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
4896
4897 if (isCube) {
4898 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
4899 srvDesc.TextureCube.MipLevels = mipLevelCount;
4900 } else {
4901 if (is1D) {
4902 if (isArray) {
4903 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY;
4904 srvDesc.Texture1DArray.MipLevels = mipLevelCount;
4905 if (m_arrayRangeStart >= 0 && m_arrayRangeLength >= 0) {
4906 srvDesc.Texture1DArray.FirstArraySlice = UINT(m_arrayRangeStart);
4907 srvDesc.Texture1DArray.ArraySize = UINT(m_arrayRangeLength);
4908 } else {
4909 srvDesc.Texture1DArray.FirstArraySlice = 0;
4910 srvDesc.Texture1DArray.ArraySize = UINT(qMax(0, m_arraySize));
4911 }
4912 } else {
4913 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D;
4914 srvDesc.Texture1D.MipLevels = mipLevelCount;
4915 }
4916 } else if (isArray) {
4917 if (sampleDesc.Count > 1) {
4918 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY;
4919 if (m_arrayRangeStart >= 0 && m_arrayRangeLength >= 0) {
4920 srvDesc.Texture2DMSArray.FirstArraySlice = UINT(m_arrayRangeStart);
4921 srvDesc.Texture2DMSArray.ArraySize = UINT(m_arrayRangeLength);
4922 } else {
4923 srvDesc.Texture2DMSArray.FirstArraySlice = 0;
4924 srvDesc.Texture2DMSArray.ArraySize = UINT(qMax(0, m_arraySize));
4925 }
4926 } else {
4927 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
4928 srvDesc.Texture2DArray.MipLevels = mipLevelCount;
4929 if (m_arrayRangeStart >= 0 && m_arrayRangeLength >= 0) {
4930 srvDesc.Texture2DArray.FirstArraySlice = UINT(m_arrayRangeStart);
4931 srvDesc.Texture2DArray.ArraySize = UINT(m_arrayRangeLength);
4932 } else {
4933 srvDesc.Texture2DArray.FirstArraySlice = 0;
4934 srvDesc.Texture2DArray.ArraySize = UINT(qMax(0, m_arraySize));
4935 }
4936 }
4937 } else {
4938 if (sampleDesc.Count > 1) {
4939 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS;
4940 } else if (is3D) {
4941 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
4942 srvDesc.Texture3D.MipLevels = mipLevelCount;
4943 } else {
4944 srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
4945 srvDesc.Texture2D.MipLevels = mipLevelCount;
4946 }
4947 }
4948 }
4949
4950 srv = rhiD->cbvSrvUavPool.allocate(1);
4951 if (!srv.isValid())
4952 return false;
4953
4954 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handle)) {
4955 rhiD->dev->CreateShaderResourceView(res->resource, &srvDesc, srv.cpuHandle);
4956 if (!m_objectName.isEmpty()) {
4957 const QString name = QString::fromUtf8(m_objectName);
4958 res->resource->SetName(reinterpret_cast<LPCWSTR>(name.utf16()));
4959 }
4960 } else {
4961 return false;
4962 }
4963
4964 generation += 1;
4965 return true;
4966}
4967
4968bool QD3D12Texture::create()
4969{
4970 QSize size;
4971 if (!prepareCreate(&size))
4972 return false;
4973
4974 const bool isDepth = isDepthTextureFormat(m_format);
4975 const bool isCube = m_flags.testFlag(CubeMap);
4976 const bool is3D = m_flags.testFlag(ThreeDimensional);
4977 const bool isArray = m_flags.testFlag(TextureArray);
4978 const bool is1D = m_flags.testFlag(OneDimensional);
4979
4980 QRHI_RES_RHI(QRhiD3D12);
4981
4982 bool needsOptimizedClearValueSpecified = false;
4983 UINT resourceFlags = 0;
4984 if (m_flags.testFlag(RenderTarget) || sampleDesc.Count > 1) {
4985 if (isDepth)
4986 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
4987 else
4988 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
4989 needsOptimizedClearValueSpecified = true;
4990 }
4991 if (m_flags.testFlag(UsedWithGenerateMips)) {
4992 if (isDepth) {
4993 qWarning("Depth texture cannot have mipmaps generated");
4994 return false;
4995 }
4996 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
4997 }
4998 if (m_flags.testFlag(UsedWithLoadStore))
4999 resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
5000
5001 D3D12_RESOURCE_DESC resourceDesc = {};
5002 resourceDesc.Dimension = is1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D
5003 : (is3D ? D3D12_RESOURCE_DIMENSION_TEXTURE3D
5004 : D3D12_RESOURCE_DIMENSION_TEXTURE2D);
5005 resourceDesc.Width = UINT64(size.width());
5006 resourceDesc.Height = UINT(size.height());
5007 resourceDesc.DepthOrArraySize = isCube ? 6
5008 : (isArray ? UINT(qMax(0, m_arraySize))
5009 : (is3D ? qMax(1, m_depth)
5010 : 1));
5011 resourceDesc.MipLevels = mipLevelCount;
5012 resourceDesc.Format = dxgiFormat;
5013 resourceDesc.SampleDesc = sampleDesc;
5014 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
5015 resourceDesc.Flags = D3D12_RESOURCE_FLAGS(resourceFlags);
5016 D3D12_CLEAR_VALUE clearValue = {};
5017 clearValue.Format = dxgiFormat;
5018 if (isDepth) {
5019 clearValue.Format = toD3DDepthTextureDSVFormat(m_format);
5020 clearValue.DepthStencil.Depth = 1.0f;
5021 clearValue.DepthStencil.Stencil = 0;
5022 }
5023 ID3D12Resource *resource = nullptr;
5024 D3D12MA::Allocation *allocation = nullptr;
5025 HRESULT hr = rhiD->vma.createResource(D3D12_HEAP_TYPE_DEFAULT,
5026 &resourceDesc,
5027 D3D12_RESOURCE_STATE_COMMON,
5028 needsOptimizedClearValueSpecified ? &clearValue : nullptr,
5029 &allocation,
5030 __uuidof(ID3D12Resource),
5031 reinterpret_cast<void **>(&resource));
5032 if (FAILED(hr)) {
5033 qWarning("Failed to create texture: '%s'"
5034 " Dim was %d Size was %ux%u Depth/ArraySize was %u MipLevels was %u Format was %d Sample count was %d",
5035 qPrintable(QSystemError::windowsComString(hr)),
5036 int(resourceDesc.Dimension),
5037 uint(resourceDesc.Width),
5038 uint(resourceDesc.Height),
5039 uint(resourceDesc.DepthOrArraySize),
5040 uint(resourceDesc.MipLevels),
5041 int(resourceDesc.Format),
5042 int(resourceDesc.SampleDesc.Count));
5043 return false;
5044 }
5045
5046 handle = QD3D12Resource::addToPool(&rhiD->resourcePool, resource, D3D12_RESOURCE_STATE_COMMON, allocation);
5047
5048 if (!finishCreate())
5049 return false;
5050
5051 rhiD->registerResource(this);
5052 return true;
5053}
5054
5055bool QD3D12Texture::createFrom(QRhiTexture::NativeTexture src)
5056{
5057 if (!src.object)
5058 return false;
5059
5060 if (!prepareCreate())
5061 return false;
5062
5063 ID3D12Resource *resource = reinterpret_cast<ID3D12Resource *>(src.object);
5064 D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATES(src.layout);
5065
5066 QRHI_RES_RHI(QRhiD3D12);
5067 handle = QD3D12Resource::addNonOwningToPool(&rhiD->resourcePool, resource, state);
5068
5069 if (!finishCreate())
5070 return false;
5071
5072 rhiD->registerResource(this);
5073 return true;
5074}
5075
5076QRhiTexture::NativeTexture QD3D12Texture::nativeTexture()
5077{
5078 QRHI_RES_RHI(QRhiD3D12);
5079 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handle))
5080 return { quint64(res->resource), int(res->state) };
5081
5082 return {};
5083}
5084
5085void QD3D12Texture::setNativeLayout(int layout)
5086{
5087 QRHI_RES_RHI(QRhiD3D12);
5088 if (QD3D12Resource *res = rhiD->resourcePool.lookupRef(handle))
5089 res->state = D3D12_RESOURCE_STATES(layout);
5090}
5091
5092QD3D12Sampler::QD3D12Sampler(QRhiImplementation *rhi, Filter magFilter, Filter minFilter, Filter mipmapMode,
5093 AddressMode u, AddressMode v, AddressMode w)
5094 : QRhiSampler(rhi, magFilter, minFilter, mipmapMode, u, v, w)
5095{
5096}
5097
5098QD3D12Sampler::~QD3D12Sampler()
5099{
5100 destroy();
5101}
5102
5103void QD3D12Sampler::destroy()
5104{
5105 shaderVisibleDescriptor = {};
5106
5107 QRHI_RES_RHI(QRhiD3D12);
5108 if (rhiD)
5109 rhiD->unregisterResource(this);
5110}
5111
5112static inline D3D12_FILTER toD3DFilter(QRhiSampler::Filter minFilter, QRhiSampler::Filter magFilter, QRhiSampler::Filter mipFilter)
5113{
5114 if (minFilter == QRhiSampler::Nearest) {
5115 if (magFilter == QRhiSampler::Nearest) {
5116 if (mipFilter == QRhiSampler::Linear)
5117 return D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
5118 else
5119 return D3D12_FILTER_MIN_MAG_MIP_POINT;
5120 } else {
5121 if (mipFilter == QRhiSampler::Linear)
5122 return D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR;
5123 else
5124 return D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT;
5125 }
5126 } else {
5127 if (magFilter == QRhiSampler::Nearest) {
5128 if (mipFilter == QRhiSampler::Linear)
5129 return D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR;
5130 else
5131 return D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
5132 } else {
5133 if (mipFilter == QRhiSampler::Linear)
5134 return D3D12_FILTER_MIN_MAG_MIP_LINEAR;
5135 else
5136 return D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
5137 }
5138 }
5139 Q_UNREACHABLE_RETURN(D3D12_FILTER_MIN_MAG_MIP_LINEAR);
5140}
5141
5142static inline D3D12_TEXTURE_ADDRESS_MODE toD3DAddressMode(QRhiSampler::AddressMode m)
5143{
5144 switch (m) {
5145 case QRhiSampler::Repeat:
5146 return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
5147 case QRhiSampler::ClampToEdge:
5148 return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
5149 case QRhiSampler::Mirror:
5150 return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
5151 }
5152 Q_UNREACHABLE_RETURN(D3D12_TEXTURE_ADDRESS_MODE_CLAMP);
5153}
5154
5155static inline D3D12_COMPARISON_FUNC toD3DTextureComparisonFunc(QRhiSampler::CompareOp op)
5156{
5157 switch (op) {
5158 case QRhiSampler::Never:
5159 return D3D12_COMPARISON_FUNC_NEVER;
5160 case QRhiSampler::Less:
5161 return D3D12_COMPARISON_FUNC_LESS;
5162 case QRhiSampler::Equal:
5163 return D3D12_COMPARISON_FUNC_EQUAL;
5164 case QRhiSampler::LessOrEqual:
5165 return D3D12_COMPARISON_FUNC_LESS_EQUAL;
5166 case QRhiSampler::Greater:
5167 return D3D12_COMPARISON_FUNC_GREATER;
5168 case QRhiSampler::NotEqual:
5169 return D3D12_COMPARISON_FUNC_NOT_EQUAL;
5170 case QRhiSampler::GreaterOrEqual:
5171 return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
5172 case QRhiSampler::Always:
5173 return D3D12_COMPARISON_FUNC_ALWAYS;
5174 }
5175 Q_UNREACHABLE_RETURN(D3D12_COMPARISON_FUNC_NEVER);
5176}
5177
5178bool QD3D12Sampler::create()
5179{
5180 desc = {};
5181 desc.Filter = toD3DFilter(m_minFilter, m_magFilter, m_mipmapMode);
5182 if (m_compareOp != Never)
5183 desc.Filter = D3D12_FILTER(desc.Filter | 0x80);
5184 desc.AddressU = toD3DAddressMode(m_addressU);
5185 desc.AddressV = toD3DAddressMode(m_addressV);
5186 desc.AddressW = toD3DAddressMode(m_addressW);
5187 desc.MaxAnisotropy = 1.0f;
5188 desc.ComparisonFunc = toD3DTextureComparisonFunc(m_compareOp);
5189 desc.MaxLOD = m_mipmapMode == None ? 0.0f : 10000.0f;
5190
5191 QRHI_RES_RHI(QRhiD3D12);
5192 rhiD->registerResource(this, false);
5193 return true;
5194}
5195
5196QD3D12Descriptor QD3D12Sampler::lookupOrCreateShaderVisibleDescriptor()
5197{
5198 if (!shaderVisibleDescriptor.isValid()) {
5199 QRHI_RES_RHI(QRhiD3D12);
5200 shaderVisibleDescriptor = rhiD->samplerMgr.getShaderVisibleDescriptor(desc);
5201 }
5202 return shaderVisibleDescriptor;
5203}
5204
5205QD3D12ShadingRateMap::QD3D12ShadingRateMap(QRhiImplementation *rhi)
5206 : QRhiShadingRateMap(rhi)
5207{
5208}
5209
5210QD3D12ShadingRateMap::~QD3D12ShadingRateMap()
5211{
5212 destroy();
5213}
5214
5215void QD3D12ShadingRateMap::destroy()
5216{
5217 if (handle.isNull())
5218 return;
5219
5220 handle = {};
5221}
5222
5223bool QD3D12ShadingRateMap::createFrom(QRhiTexture *src)
5224{
5225 if (!handle.isNull())
5226 destroy();
5227
5228 handle = QRHI_RES(QD3D12Texture, src)->handle;
5229
5230 return true;
5231}
5232
5233QD3D12TextureRenderTarget::QD3D12TextureRenderTarget(QRhiImplementation *rhi,
5234 const QRhiTextureRenderTargetDescription &desc,
5235 Flags flags)
5236 : QRhiTextureRenderTarget(rhi, desc, flags),
5237 d(rhi)
5238{
5239}
5240
5241QD3D12TextureRenderTarget::~QD3D12TextureRenderTarget()
5242{
5243 destroy();
5244}
5245
5246void QD3D12TextureRenderTarget::destroy()
5247{
5248 if (!rtv[0].isValid() && !dsv.isValid())
5249 return;
5250
5251 QRHI_RES_RHI(QRhiD3D12);
5252 if (dsv.isValid()) {
5253 if (ownsDsv && rhiD)
5254 rhiD->releaseQueue.deferredReleaseViews(&rhiD->dsvPool, dsv, 1);
5255 dsv = {};
5256 }
5257
5258 for (int i = 0; i < QD3D12RenderTargetData::MAX_COLOR_ATTACHMENTS; ++i) {
5259 if (rtv[i].isValid()) {
5260 if (ownsRtv[i] && rhiD)
5261 rhiD->releaseQueue.deferredReleaseViews(&rhiD->rtvPool, rtv[i], 1);
5262 rtv[i] = {};
5263 }
5264 }
5265
5266 if (rhiD)
5267 rhiD->unregisterResource(this);
5268}
5269
5270QRhiRenderPassDescriptor *QD3D12TextureRenderTarget::newCompatibleRenderPassDescriptor()
5271{
5272 // not yet built so cannot rely on data computed in create()
5273
5274 QD3D12RenderPassDescriptor *rpD = new QD3D12RenderPassDescriptor(m_rhi);
5275
5276 rpD->colorAttachmentCount = 0;
5277 for (auto it = m_desc.cbeginColorAttachments(), itEnd = m_desc.cendColorAttachments(); it != itEnd; ++it) {
5278 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, it->texture());
5279 QD3D12RenderBuffer *rbD = QRHI_RES(QD3D12RenderBuffer, it->renderBuffer());
5280 if (texD)
5281 rpD->colorFormat[rpD->colorAttachmentCount] = texD->rtFormat;
5282 else if (rbD)
5283 rpD->colorFormat[rpD->colorAttachmentCount] = rbD->dxgiFormat;
5284 rpD->colorAttachmentCount += 1;
5285 }
5286
5287 rpD->hasDepthStencil = false;
5288 if (m_desc.depthStencilBuffer()) {
5289 rpD->hasDepthStencil = true;
5290 rpD->dsFormat = QD3D12RenderBuffer::DS_FORMAT;
5291 } else if (m_desc.depthTexture()) {
5292 QD3D12Texture *depthTexD = QRHI_RES(QD3D12Texture, m_desc.depthTexture());
5293 rpD->hasDepthStencil = true;
5294 rpD->dsFormat = toD3DDepthTextureDSVFormat(depthTexD->format()); // cannot be a typeless format
5295 }
5296
5297 rpD->hasShadingRateMap = m_desc.shadingRateMap() != nullptr;
5298
5299 rpD->updateSerializedFormat();
5300
5301 QRHI_RES_RHI(QRhiD3D12);
5302 rhiD->registerResource(rpD);
5303 return rpD;
5304}
5305
5306bool QD3D12TextureRenderTarget::create()
5307{
5308 if (rtv[0].isValid() || dsv.isValid())
5309 destroy();
5310
5311 QRHI_RES_RHI(QRhiD3D12);
5312 Q_ASSERT(m_desc.colorAttachmentCount() > 0 || m_desc.depthTexture());
5313 Q_ASSERT(!m_desc.depthStencilBuffer() || !m_desc.depthTexture());
5314 const bool hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture();
5315 d.colorAttCount = 0;
5316 int attIndex = 0;
5317
5318 for (auto it = m_desc.cbeginColorAttachments(), itEnd = m_desc.cendColorAttachments(); it != itEnd; ++it, ++attIndex) {
5319 d.colorAttCount += 1;
5320 const QRhiColorAttachment &colorAtt(*it);
5321 QRhiTexture *texture = colorAtt.texture();
5322 QRhiRenderBuffer *rb = colorAtt.renderBuffer();
5323 Q_ASSERT(texture || rb);
5324 if (texture) {
5325 QD3D12Texture *texD = QRHI_RES(QD3D12Texture, texture);
5326 QD3D12Resource *res = rhiD->resourcePool.lookupRef(texD->handle);
5327 if (!res) {
5328 qWarning("Could not look up texture handle for render target");
5329 return false;
5330 }
5331 const bool isMultiView = it->multiViewCount() >= 2;
5332 UINT layerCount = isMultiView ? UINT(it->multiViewCount()) : 1;
5333 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
5334 rtvDesc.Format = texD->rtFormat;
5335 if (texD->flags().testFlag(QRhiTexture::CubeMap)) {
5336 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
5337 rtvDesc.Texture2DArray.MipSlice = UINT(colorAtt.level());
5338 rtvDesc.Texture2DArray.FirstArraySlice = UINT(colorAtt.layer());
5339 rtvDesc.Texture2DArray.ArraySize = layerCount;
5340 } else if (texD->flags().testFlag(QRhiTexture::OneDimensional)) {
5341 if (texD->flags().testFlag(QRhiTexture::TextureArray)) {
5342 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY;
5343 rtvDesc.Texture1DArray.MipSlice = UINT(colorAtt.level());
5344 rtvDesc.Texture1DArray.FirstArraySlice = UINT(colorAtt.layer());
5345 rtvDesc.Texture1DArray.ArraySize = layerCount;
5346 } else {
5347 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D;
5348 rtvDesc.Texture1D.MipSlice = UINT(colorAtt.level());
5349 }
5350 } else if (texD->flags().testFlag(QRhiTexture::TextureArray)) {
5351 if (texD->sampleDesc.Count > 1) {
5352 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY;
5353 rtvDesc.Texture2DMSArray.FirstArraySlice = UINT(colorAtt.layer());
5354 rtvDesc.Texture2DMSArray.ArraySize = layerCount;
5355 } else {
5356 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
5357 rtvDesc.Texture2DArray.MipSlice = UINT(colorAtt.level());
5358 rtvDesc.Texture2DArray.FirstArraySlice = UINT(colorAtt.layer());
5359 rtvDesc.Texture2DArray.ArraySize = layerCount;
5360 }
5361 } else if (texD->flags().testFlag(QRhiTexture::ThreeDimensional)) {
5362 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D;
5363 rtvDesc.Texture3D.MipSlice = UINT(colorAtt.level());
5364 rtvDesc.Texture3D.FirstWSlice = UINT(colorAtt.layer());
5365 rtvDesc.Texture3D.WSize = layerCount;
5366 } else {
5367 if (texD->sampleDesc.Count > 1) {
5368 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS;
5369 } else {
5370 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
5371 rtvDesc.Texture2D.MipSlice = UINT(colorAtt.level());
5372 }
5373 }
5374 rtv[attIndex] = rhiD->rtvPool.allocate(1);
5375 if (!rtv[attIndex].isValid()) {
5376 qWarning("Failed to allocate RTV for texture render target");
5377 return false;
5378 }
5379 rhiD->dev->CreateRenderTargetView(res->resource, &rtvDesc, rtv[attIndex].cpuHandle);
5380 ownsRtv[attIndex] = true;
5381 if (attIndex == 0) {
5382 d.pixelSize = rhiD->q->sizeForMipLevel(colorAtt.level(), texD->pixelSize());
5383 d.sampleCount = int(texD->sampleDesc.Count);
5384 }
5385 } else if (rb) {
5386 QD3D12RenderBuffer *rbD = QRHI_RES(QD3D12RenderBuffer, rb);
5387 ownsRtv[attIndex] = false;
5388 rtv[attIndex] = rbD->rtv;
5389 if (attIndex == 0) {
5390 d.pixelSize = rbD->pixelSize();
5391 d.sampleCount = int(rbD->sampleDesc.Count);
5392 }
5393 }
5394 }
5395
5396 d.dpr = 1;
5397
5398 if (hasDepthStencil) {
5399 if (m_desc.depthTexture()) {
5400 ownsDsv = true;
5401 QD3D12Texture *depthTexD = QRHI_RES(QD3D12Texture, m_desc.depthTexture());
5402 QD3D12Resource *res = rhiD->resourcePool.lookupRef(depthTexD->handle);
5403 if (!res) {
5404 qWarning("Could not look up depth texture handle");
5405 return false;
5406 }
5407 D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {};
5408 dsvDesc.Format = depthTexD->rtFormat;
5409 dsvDesc.ViewDimension = depthTexD->sampleDesc.Count > 1 ? D3D12_DSV_DIMENSION_TEXTURE2DMS
5410 : D3D12_DSV_DIMENSION_TEXTURE2D;
5411 if (depthTexD->flags().testFlag(QRhiTexture::TextureArray)) {
5412 if (depthTexD->sampleDesc.Count > 1) {
5413 dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY;
5414 if (depthTexD->arrayRangeStart() >= 0 && depthTexD->arrayRangeLength() >= 0) {
5415 dsvDesc.Texture2DMSArray.FirstArraySlice = UINT(depthTexD->arrayRangeStart());
5416 dsvDesc.Texture2DMSArray.ArraySize = UINT(depthTexD->arrayRangeLength());
5417 } else {
5418 dsvDesc.Texture2DMSArray.FirstArraySlice = 0;
5419 dsvDesc.Texture2DMSArray.ArraySize = UINT(qMax(0, depthTexD->arraySize()));
5420 }
5421 } else {
5422 dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
5423 if (depthTexD->arrayRangeStart() >= 0 && depthTexD->arrayRangeLength() >= 0) {
5424 dsvDesc.Texture2DArray.FirstArraySlice = UINT(depthTexD->arrayRangeStart());
5425 dsvDesc.Texture2DArray.ArraySize = UINT(depthTexD->arrayRangeLength());
5426 } else {
5427 dsvDesc.Texture2DArray.FirstArraySlice = 0;
5428 dsvDesc.Texture2DArray.ArraySize = UINT(qMax(0, depthTexD->arraySize()));
5429 }
5430 }
5431 }
5432 dsv = rhiD->dsvPool.allocate(1);
5433 if (!dsv.isValid()) {
5434 qWarning("Failed to allocate DSV for texture render target");
5435 return false;
5436 }
5437 rhiD->dev->CreateDepthStencilView(res->resource, &dsvDesc, dsv.cpuHandle);
5438 if (d.colorAttCount == 0) {
5439 d.pixelSize = depthTexD->pixelSize();
5440 d.sampleCount = int(depthTexD->sampleDesc.Count);
5441 }
5442 } else {
5443 ownsDsv = false;
5444 QD3D12RenderBuffer *depthRbD = QRHI_RES(QD3D12RenderBuffer, m_desc.depthStencilBuffer());
5445 dsv = depthRbD->dsv;
5446 if (d.colorAttCount == 0) {
5447 d.pixelSize = m_desc.depthStencilBuffer()->pixelSize();
5448 d.sampleCount = int(depthRbD->sampleDesc.Count);
5449 }
5450 }
5451 d.dsAttCount = 1;
5452 } else {
5453 d.dsAttCount = 0;
5454 }
5455
5456 D3D12_CPU_DESCRIPTOR_HANDLE nullDescHandle = { 0 };
5457 for (int i = 0; i < QD3D12RenderTargetData::MAX_COLOR_ATTACHMENTS; ++i)
5458 d.rtv[i] = i < d.colorAttCount ? rtv[i].cpuHandle : nullDescHandle;
5459 d.dsv = dsv.cpuHandle;
5460 d.rp = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc);
5461
5462 QRhiRenderTargetAttachmentTracker::updateResIdList<QD3D12Texture, QD3D12RenderBuffer>(m_desc, &d.currentResIdList);
5463
5464 rhiD->registerResource(this);
5465 return true;
5466}
5467
5468QSize QD3D12TextureRenderTarget::pixelSize() const
5469{
5470 if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QD3D12Texture, QD3D12RenderBuffer>(m_desc, d.currentResIdList))
5471 const_cast<QD3D12TextureRenderTarget *>(this)->create();
5472
5473 return d.pixelSize;
5474}
5475
5476float QD3D12TextureRenderTarget::devicePixelRatio() const
5477{
5478 return d.dpr;
5479}
5480
5481int QD3D12TextureRenderTarget::sampleCount() const
5482{
5483 return d.sampleCount;
5484}
5485
5486QD3D12ShaderResourceBindings::QD3D12ShaderResourceBindings(QRhiImplementation *rhi)
5487 : QRhiShaderResourceBindings(rhi)
5488{
5489}
5490
5491QD3D12ShaderResourceBindings::~QD3D12ShaderResourceBindings()
5492{
5493 destroy();
5494}
5495
5496void QD3D12ShaderResourceBindings::destroy()
5497{
5498 QRHI_RES_RHI(QRhiD3D12);
5499 if (rhiD)
5500 rhiD->unregisterResource(this);
5501}
5502
5503bool QD3D12ShaderResourceBindings::create()
5504{
5505 QRHI_RES_RHI(QRhiD3D12);
5506 if (!rhiD->sanityCheckShaderResourceBindings(this))
5507 return false;
5508
5509 rhiD->updateLayoutDesc(this);
5510
5511 hasDynamicOffset = false;
5512 for (const QRhiShaderResourceBinding &b : std::as_const(m_bindings)) {
5513 const QRhiShaderResourceBinding::Data *bd = QRhiImplementation::shaderResourceBindingData(b);
5514 if (bd->type == QRhiShaderResourceBinding::UniformBuffer && bd->u.ubuf.hasDynamicOffset) {
5515 hasDynamicOffset = true;
5516 break;
5517 }
5518 }
5519
5520 // The root signature is not part of the srb. Unintuitive, but the shader
5521 // translation pipeline ties our hands: as long as the per-shader (so per
5522 // stage!) nativeResourceBindingMap exist, meaning f.ex. that a SPIR-V
5523 // combined image sampler binding X passed in here may map to the tY and sY
5524 // HLSL registers, where Y is known only once the mapping table from the
5525 // shader is looked up. Creating a root parameters at this stage is
5526 // therefore impossible.
5527
5528 generation += 1;
5529 rhiD->registerResource(this, false);
5530 return true;
5531}
5532
5533void QD3D12ShaderResourceBindings::updateResources(UpdateFlags flags)
5534{
5535 Q_UNUSED(flags);
5536 generation += 1;
5537}
5538
5539// Accessing the QRhiBuffer/Texture/Sampler resources must be avoided in the
5540// callbacks; that would only be possible if the srb had those specified, and
5541// that's not required at the time of srb and pipeline create() time, and
5542// createRootSignature is called from the pipeline create().
5543
5544void QD3D12ShaderResourceBindings::visitUniformBuffer(QD3D12Stage s,
5545 const QRhiShaderResourceBinding::Data::UniformBufferData &,
5546 int shaderRegister,
5547 int)
5548{
5549 D3D12_ROOT_PARAMETER1 rootParam = {};
5550 rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
5551 rootParam.ShaderVisibility = qd3d12_stageToVisibility(s);
5552 rootParam.Descriptor.ShaderRegister = shaderRegister;
5553 rootParam.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC;
5554 visitorData.cbParams[s].append(rootParam);
5555}
5556
5557void QD3D12ShaderResourceBindings::visitTexture(QD3D12Stage s,
5558 const QRhiShaderResourceBinding::TextureAndSampler &,
5559 int shaderRegister)
5560{
5561 D3D12_DESCRIPTOR_RANGE1 range = {};
5562 range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
5563 range.NumDescriptors = 1;
5564 range.BaseShaderRegister = shaderRegister;
5565 range.OffsetInDescriptorsFromTableStart = visitorData.currentSrvRangeOffset[s];
5566 visitorData.currentSrvRangeOffset[s] += 1;
5567 visitorData.srvRanges[s].append(range);
5568 if (visitorData.srvRanges[s].count() == 1) {
5569 visitorData.srvTables[s].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
5570 visitorData.srvTables[s].ShaderVisibility = qd3d12_stageToVisibility(s);
5571 }
5572}
5573
5574void QD3D12ShaderResourceBindings::visitSampler(QD3D12Stage s,
5575 const QRhiShaderResourceBinding::TextureAndSampler &,
5576 int shaderRegister)
5577{
5578 // Unlike SRVs and UAVs, samplers are handled so that each sampler becomes
5579 // a root parameter with its own descriptor table.
5580
5581 int &rangeStoreIdx(visitorData.samplerRangeHeads[s]);
5582 if (rangeStoreIdx == 16) {
5583 qWarning("Sampler count in QD3D12Stage %d exceeds the limit of 16, this is disallowed by QRhi", s);
5584 return;
5585 }
5586 D3D12_DESCRIPTOR_RANGE1 range = {};
5587 range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
5588 range.NumDescriptors = 1;
5589 range.BaseShaderRegister = shaderRegister;
5590 visitorData.samplerRanges[s][rangeStoreIdx] = range;
5591 D3D12_ROOT_PARAMETER1 param = {};
5592 param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
5593 param.ShaderVisibility = qd3d12_stageToVisibility(s);
5594 param.DescriptorTable.NumDescriptorRanges = 1;
5595 param.DescriptorTable.pDescriptorRanges = &visitorData.samplerRanges[s][rangeStoreIdx];
5596 rangeStoreIdx += 1;
5597 visitorData.samplerTables[s].append(param);
5598}
5599
5600void QD3D12ShaderResourceBindings::visitStorageBuffer(QD3D12Stage s,
5601 const QRhiShaderResourceBinding::Data::StorageBufferData &,
5602 QD3D12ShaderResourceVisitor::StorageOp,
5603 int shaderRegister)
5604{
5605 D3D12_DESCRIPTOR_RANGE1 range = {};
5606 range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
5607 range.NumDescriptors = 1;
5608 range.BaseShaderRegister = shaderRegister;
5609 range.OffsetInDescriptorsFromTableStart = visitorData.currentUavRangeOffset[s];
5610 visitorData.currentUavRangeOffset[s] += 1;
5611 visitorData.uavRanges[s].append(range);
5612 if (visitorData.uavRanges[s].count() == 1) {
5613 visitorData.uavTables[s].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
5614 visitorData.uavTables[s].ShaderVisibility = qd3d12_stageToVisibility(s);
5615 }
5616}
5617
5618void QD3D12ShaderResourceBindings::visitStorageImage(QD3D12Stage s,
5619 const QRhiShaderResourceBinding::Data::StorageImageData &,
5620 QD3D12ShaderResourceVisitor::StorageOp,
5621 int shaderRegister)
5622{
5623 D3D12_DESCRIPTOR_RANGE1 range = {};
5624 range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
5625 range.NumDescriptors = 1;
5626 range.BaseShaderRegister = shaderRegister;
5627 range.OffsetInDescriptorsFromTableStart = visitorData.currentUavRangeOffset[s];
5628 visitorData.currentUavRangeOffset[s] += 1;
5629 visitorData.uavRanges[s].append(range);
5630 if (visitorData.uavRanges[s].count() == 1) {
5631 visitorData.uavTables[s].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
5632 visitorData.uavTables[s].ShaderVisibility = qd3d12_stageToVisibility(s);
5633 }
5634}
5635
5636QD3D12ObjectHandle QD3D12ShaderResourceBindings::createRootSignature(const QD3D12ShaderStageData *stageData,
5637 int stageCount)
5638{
5639 QRHI_RES_RHI(QRhiD3D12);
5640
5641 // It's not just that the root signature has to be tied to the pipeline
5642 // (cannot just freely create it like e.g. with Vulkan where one just
5643 // creates a descriptor layout 1:1 with the QRhiShaderResourceBindings'
5644 // data), due to not knowing the shader-specific resource binding mapping
5645 // tables at the point of srb creation, but each shader stage may have a
5646 // different mapping table. (ugh!)
5647 //
5648 // Hence we set up everything per-stage, even if it means the root
5649 // signature gets unnecessarily big. (note that the magic is in the
5650 // ShaderVisibility: even though the register range is the same in the
5651 // descriptor tables, the visibility is different)
5652
5653 QD3D12ShaderResourceVisitor visitor(this, stageData, stageCount);
5654
5655 visitorData = {};
5656
5657 using namespace std::placeholders;
5658 visitor.uniformBuffer = std::bind(&QD3D12ShaderResourceBindings::visitUniformBuffer, this, _1, _2, _3, _4);
5659 visitor.texture = std::bind(&QD3D12ShaderResourceBindings::visitTexture, this, _1, _2, _3);
5660 visitor.sampler = std::bind(&QD3D12ShaderResourceBindings::visitSampler, this, _1, _2, _3);
5661 visitor.storageBuffer = std::bind(&QD3D12ShaderResourceBindings::visitStorageBuffer, this, _1, _2, _3, _4);
5662 visitor.storageImage = std::bind(&QD3D12ShaderResourceBindings::visitStorageImage, this, _1, _2, _3, _4);
5663
5664 visitor.visit();
5665
5666 // The maximum size of a root signature is 256 bytes, where a descriptor
5667 // table is 4, a root descriptor (e.g. CBV) is 8. We have 5 stages at most
5668 // (or 1 with compute) and a separate descriptor table for SRVs (->
5669 // textures) and UAVs (-> storage buffers and images) per stage, plus each
5670 // uniform buffer counts as a CBV in the stages it is visible.
5671 //
5672 // Due to the limited maximum size of a shader-visible sampler heap (2048)
5673 // and the potential costly switching of descriptor heaps, each sampler is
5674 // declared as a separate root parameter / descriptor table (meaning that
5675 // two samplers in the same stage are two parameters and two tables, not
5676 // just one). QRhi documents a hard limit of 16 on texture/sampler bindings
5677 // in a shader (matching D3D11), so we can hopefully get away with this.
5678 //
5679 // This means that e.g. a vertex+fragment shader with a uniform buffer
5680 // visible in both and one texture+sampler in the fragment shader would
5681 // consume 2*8 + 4 + 4 = 24 bytes. This also implies that clients
5682 // specifying the minimal stage bit mask for each entry in
5683 // QRhiShaderResourceBindings are ideal for this backend since it helps
5684 // reducing the chance of hitting the size limit.
5685
5686 QVarLengthArray<D3D12_ROOT_PARAMETER1, 4> rootParams;
5687 for (int s = 0; s < 6; ++s) {
5688 if (!visitorData.cbParams[s].isEmpty())
5689 rootParams.append(visitorData.cbParams[s].constData(), visitorData.cbParams[s].count());
5690 }
5691 for (int s = 0; s < 6; ++s) {
5692 if (!visitorData.srvRanges[s].isEmpty()) {
5693 visitorData.srvTables[s].DescriptorTable.NumDescriptorRanges = visitorData.srvRanges[s].count();
5694 visitorData.srvTables[s].DescriptorTable.pDescriptorRanges = visitorData.srvRanges[s].constData();
5695 rootParams.append(visitorData.srvTables[s]);
5696 }
5697 }
5698 for (int s = 0; s < 6; ++s) {
5699 if (!visitorData.samplerTables[s].isEmpty())
5700 rootParams.append(visitorData.samplerTables[s].constData(), visitorData.samplerTables[s].count());
5701 }
5702 for (int s = 0; s < 6; ++s) {
5703 if (!visitorData.uavRanges[s].isEmpty()) {
5704 visitorData.uavTables[s].DescriptorTable.NumDescriptorRanges = visitorData.uavRanges[s].count();
5705 visitorData.uavTables[s].DescriptorTable.pDescriptorRanges = visitorData.uavRanges[s].constData();
5706 rootParams.append(visitorData.uavTables[s]);
5707 }
5708 }
5709
5710 D3D12_VERSIONED_ROOT_SIGNATURE_DESC rsDesc = {};
5711 rsDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
5712 if (!rootParams.isEmpty()) {
5713 rsDesc.Desc_1_1.NumParameters = rootParams.count();
5714 rsDesc.Desc_1_1.pParameters = rootParams.constData();
5715 }
5716
5717 UINT rsFlags = 0;
5718 for (int stageIdx = 0; stageIdx < stageCount; ++stageIdx) {
5719 if (stageData[stageIdx].valid && stageData[stageIdx].stage == VS)
5720 rsFlags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
5721 }
5722 rsDesc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAGS(rsFlags);
5723
5724 ID3DBlob *signature = nullptr;
5725 HRESULT hr = D3D12SerializeVersionedRootSignature(&rsDesc, &signature, nullptr);
5726 if (FAILED(hr)) {
5727 qWarning("Failed to serialize root signature: %s", qPrintable(QSystemError::windowsComString(hr)));
5728 return {};
5729 }
5730 ID3D12RootSignature *rootSig = nullptr;
5731 hr = rhiD->dev->CreateRootSignature(0,
5732 signature->GetBufferPointer(),
5733 signature->GetBufferSize(),
5734 __uuidof(ID3D12RootSignature),
5735 reinterpret_cast<void **>(&rootSig));
5736 signature->Release();
5737 if (FAILED(hr)) {
5738 qWarning("Failed to create root signature: %s", qPrintable(QSystemError::windowsComString(hr)));
5739 return {};
5740 }
5741
5742 return QD3D12RootSignature::addToPool(&rhiD->rootSignaturePool, rootSig);
5743}
5744
5745// For shader model < 6.0 we do the same as the D3D11 backend: use the old
5746// compiler (D3DCompile) to generate DXBC, just as qsb does (when -c is passed)
5747// by invoking fxc, not dxc. For SM >= 6.0 we have to use the new compiler and
5748// work with DXIL. And that involves IDxcCompiler and needs the presence of
5749// dxcompiler.dll and dxil.dll at runtime. Plus there's a chance we have
5750// ancient SDK headers when not using MSVC. So this is heavily optional,
5751// meaning support for dxc can be disabled both at build time (no dxcapi.h) and
5752// at run time (no DLLs).
5753
5754static inline void makeHlslTargetString(char target[7], const char stage[3], int version)
5755{
5756 const int smMajor = version / 10;
5757 const int smMinor = version % 10;
5758 target[0] = stage[0];
5759 target[1] = stage[1];
5760 target[2] = '_';
5761 target[3] = '0' + smMajor;
5762 target[4] = '_';
5763 target[5] = '0' + smMinor;
5764 target[6] = '\0';
5765}
5766
5767enum class HlslCompileFlag
5768{
5769 WithDebugInfo = 0x01
5770};
5771
5772static QByteArray legacyCompile(const QShaderCode &hlslSource, const char *target, int flags, QString *error)
5773{
5774 static const pD3DCompile d3dCompile = QRhiD3D::resolveD3DCompile();
5775 if (!d3dCompile) {
5776 qWarning("Unable to resolve function D3DCompile()");
5777 return QByteArray();
5778 }
5779
5780 ID3DBlob *bytecode = nullptr;
5781 ID3DBlob *errors = nullptr;
5782 UINT d3dCompileFlags = 0;
5783 if (flags & int(HlslCompileFlag::WithDebugInfo))
5784 d3dCompileFlags |= D3DCOMPILE_DEBUG;
5785
5786 HRESULT hr = d3dCompile(hlslSource.shader().constData(), SIZE_T(hlslSource.shader().size()),
5787 nullptr, nullptr, nullptr,
5788 hlslSource.entryPoint().constData(), target, d3dCompileFlags, 0, &bytecode, &errors);
5789 if (FAILED(hr) || !bytecode) {
5790 qWarning("HLSL shader compilation failed: 0x%x", uint(hr));
5791 if (errors) {
5792 *error = QString::fromUtf8(static_cast<const char *>(errors->GetBufferPointer()),
5793 int(errors->GetBufferSize()));
5794 errors->Release();
5795 }
5796 return QByteArray();
5797 }
5798
5799 QByteArray result;
5800 result.resize(int(bytecode->GetBufferSize()));
5801 memcpy(result.data(), bytecode->GetBufferPointer(), size_t(result.size()));
5802 bytecode->Release();
5803 return result;
5804}
5805
5806#ifdef QRHI_D3D12_HAS_DXC
5807
5808#ifndef DXC_CP_UTF8
5809#define DXC_CP_UTF8 65001
5810#endif
5811
5812#ifndef DXC_ARG_DEBUG
5813#define DXC_ARG_DEBUG L"-Zi"
5814#endif
5815
5816static QByteArray dxcCompile(const QShaderCode &hlslSource, const char *target, int flags, QString *error)
5817{
5818 static std::pair<IDxcCompiler *, IDxcLibrary *> dxc = QRhiD3D::createDxcCompiler();
5819 IDxcCompiler *compiler = dxc.first;
5820 if (!compiler) {
5821 qWarning("Unable to instantiate IDxcCompiler. Likely no dxcompiler.dll and dxil.dll present. "
5822 "Use windeployqt or try https://github.com/microsoft/DirectXShaderCompiler/releases");
5823 return QByteArray();
5824 }
5825 IDxcLibrary *library = dxc.second;
5826 if (!library)
5827 return QByteArray();
5828
5829 IDxcBlobEncoding *sourceBlob = nullptr;
5830 HRESULT hr = library->CreateBlobWithEncodingOnHeapCopy(hlslSource.shader().constData(),
5831 UINT32(hlslSource.shader().size()),
5832 DXC_CP_UTF8,
5833 &sourceBlob);
5834 if (FAILED(hr)) {
5835 qWarning("Failed to create source blob for dxc: 0x%x (%s)",
5836 uint(hr),
5837 qPrintable(QSystemError::windowsComString(hr)));
5838 return QByteArray();
5839 }
5840
5841 const QString entryPointStr = QString::fromLatin1(hlslSource.entryPoint());
5842 const QString targetStr = QString::fromLatin1(target);
5843
5844 QVarLengthArray<LPCWSTR, 4> argPtrs;
5845 QString debugArg;
5846 if (flags & int(HlslCompileFlag::WithDebugInfo)) {
5847 debugArg = QString::fromUtf16(reinterpret_cast<const char16_t *>(DXC_ARG_DEBUG));
5848 argPtrs.append(reinterpret_cast<LPCWSTR>(debugArg.utf16()));
5849 }
5850
5851 IDxcOperationResult *result = nullptr;
5852 hr = compiler->Compile(sourceBlob,
5853 nullptr,
5854 reinterpret_cast<LPCWSTR>(entryPointStr.utf16()),
5855 reinterpret_cast<LPCWSTR>(targetStr.utf16()),
5856 argPtrs.data(), argPtrs.count(),
5857 nullptr, 0,
5858 nullptr,
5859 &result);
5860 sourceBlob->Release();
5861 if (SUCCEEDED(hr))
5862 result->GetStatus(&hr);
5863 if (FAILED(hr)) {
5864 qWarning("HLSL shader compilation failed: 0x%x (%s)",
5865 uint(hr),
5866 qPrintable(QSystemError::windowsComString(hr)));
5867 if (result) {
5868 IDxcBlobEncoding *errorsBlob = nullptr;
5869 if (SUCCEEDED(result->GetErrorBuffer(&errorsBlob))) {
5870 if (errorsBlob) {
5871 *error = QString::fromUtf8(static_cast<const char *>(errorsBlob->GetBufferPointer()),
5872 int(errorsBlob->GetBufferSize()));
5873 errorsBlob->Release();
5874 }
5875 }
5876 }
5877 return QByteArray();
5878 }
5879
5880 IDxcBlob *bytecode = nullptr;
5881 if FAILED(result->GetResult(&bytecode)) {
5882 qWarning("No result from IDxcCompiler: 0x%x (%s)",
5883 uint(hr),
5884 qPrintable(QSystemError::windowsComString(hr)));
5885 return QByteArray();
5886 }
5887
5888 QByteArray ba;
5889 ba.resize(int(bytecode->GetBufferSize()));
5890 memcpy(ba.data(), bytecode->GetBufferPointer(), size_t(ba.size()));
5891 bytecode->Release();
5892 return ba;
5893}
5894
5895#endif // QRHI_D3D12_HAS_DXC
5896
5897static QByteArray compileHlslShaderSource(const QShader &shader,
5898 QShader::Variant shaderVariant,
5899 int flags,
5900 QString *error,
5901 QShaderKey *usedShaderKey)
5902{
5903 // look for SM 6.7, 6.6, .., 5.0
5904 const int shaderModelMax = 67;
5905 for (int sm = shaderModelMax; sm >= 50; --sm) {
5906 for (QShader::Source type : { QShader::DxilShader, QShader::DxbcShader }) {
5907 QShaderKey key = { type, sm, shaderVariant };
5908 QShaderCode intermediateBytecodeShader = shader.shader(key);
5909 if (!intermediateBytecodeShader.shader().isEmpty()) {
5910 if (usedShaderKey)
5911 *usedShaderKey = key;
5912 return intermediateBytecodeShader.shader();
5913 }
5914 }
5915 }
5916
5917 QShaderCode hlslSource;
5918 QShaderKey key;
5919 for (int sm = shaderModelMax; sm >= 50; --sm) {
5920 key = { QShader::HlslShader, sm, shaderVariant };
5921 hlslSource = shader.shader(key);
5922 if (!hlslSource.shader().isEmpty())
5923 break;
5924 }
5925
5926 if (hlslSource.shader().isEmpty()) {
5927 qWarning() << "No HLSL (shader model 6.7..5.0) code found in baked shader" << shader;
5928 return QByteArray();
5929 }
5930
5931 if (usedShaderKey)
5932 *usedShaderKey = key;
5933
5934 char target[7];
5935 switch (shader.stage()) {
5936 case QShader::VertexStage:
5937 makeHlslTargetString(target, "vs", key.sourceVersion().version());
5938 break;
5939 case QShader::TessellationControlStage:
5940 makeHlslTargetString(target, "hs", key.sourceVersion().version());
5941 break;
5942 case QShader::TessellationEvaluationStage:
5943 makeHlslTargetString(target, "ds", key.sourceVersion().version());
5944 break;
5945 case QShader::GeometryStage:
5946 makeHlslTargetString(target, "gs", key.sourceVersion().version());
5947 break;
5948 case QShader::FragmentStage:
5949 makeHlslTargetString(target, "ps", key.sourceVersion().version());
5950 break;
5951 case QShader::ComputeStage:
5952 makeHlslTargetString(target, "cs", key.sourceVersion().version());
5953 break;
5954 }
5955
5956 if (key.sourceVersion().version() >= 60) {
5957#ifdef QRHI_D3D12_HAS_DXC
5958 return dxcCompile(hlslSource, target, flags, error);
5959#else
5960 qWarning("Attempted to runtime-compile HLSL source code for shader model >= 6.0 "
5961 "but the Qt build has no support for DXC. "
5962 "Rebuild Qt with a recent Windows SDK or switch to an MSVC build.");
5963#endif
5964 }
5965
5966 return legacyCompile(hlslSource, target, flags, error);
5967}
5968
5969static inline UINT8 toD3DColorWriteMask(QRhiGraphicsPipeline::ColorMask c)
5970{
5971 UINT8 f = 0;
5972 if (c.testFlag(QRhiGraphicsPipeline::R))
5973 f |= D3D12_COLOR_WRITE_ENABLE_RED;
5974 if (c.testFlag(QRhiGraphicsPipeline::G))
5975 f |= D3D12_COLOR_WRITE_ENABLE_GREEN;
5976 if (c.testFlag(QRhiGraphicsPipeline::B))
5977 f |= D3D12_COLOR_WRITE_ENABLE_BLUE;
5978 if (c.testFlag(QRhiGraphicsPipeline::A))
5979 f |= D3D12_COLOR_WRITE_ENABLE_ALPHA;
5980 return f;
5981}
5982
5983static inline D3D12_BLEND toD3DBlendFactor(QRhiGraphicsPipeline::BlendFactor f, bool rgb)
5984{
5985 // SrcBlendAlpha and DstBlendAlpha do not accept *_COLOR. With other APIs
5986 // this is handled internally (so that e.g. VK_BLEND_FACTOR_SRC_COLOR is
5987 // accepted and is in effect equivalent to VK_BLEND_FACTOR_SRC_ALPHA when
5988 // set as an alpha src/dest factor), but for D3D we have to take care of it
5989 // ourselves. Hence the rgb argument.
5990
5991 switch (f) {
5992 case QRhiGraphicsPipeline::Zero:
5993 return D3D12_BLEND_ZERO;
5994 case QRhiGraphicsPipeline::One:
5995 return D3D12_BLEND_ONE;
5996 case QRhiGraphicsPipeline::SrcColor:
5997 return rgb ? D3D12_BLEND_SRC_COLOR : D3D12_BLEND_SRC_ALPHA;
5998 case QRhiGraphicsPipeline::OneMinusSrcColor:
5999 return rgb ? D3D12_BLEND_INV_SRC_COLOR : D3D12_BLEND_INV_SRC_ALPHA;
6000 case QRhiGraphicsPipeline::DstColor:
6001 return rgb ? D3D12_BLEND_DEST_COLOR : D3D12_BLEND_DEST_ALPHA;
6002 case QRhiGraphicsPipeline::OneMinusDstColor:
6003 return rgb ? D3D12_BLEND_INV_DEST_COLOR : D3D12_BLEND_INV_DEST_ALPHA;
6004 case QRhiGraphicsPipeline::SrcAlpha:
6005 return D3D12_BLEND_SRC_ALPHA;
6006 case QRhiGraphicsPipeline::OneMinusSrcAlpha:
6007 return D3D12_BLEND_INV_SRC_ALPHA;
6008 case QRhiGraphicsPipeline::DstAlpha:
6009 return D3D12_BLEND_DEST_ALPHA;
6010 case QRhiGraphicsPipeline::OneMinusDstAlpha:
6011 return D3D12_BLEND_INV_DEST_ALPHA;
6012 case QRhiGraphicsPipeline::ConstantColor:
6013 case QRhiGraphicsPipeline::ConstantAlpha:
6014 return D3D12_BLEND_BLEND_FACTOR;
6015 case QRhiGraphicsPipeline::OneMinusConstantColor:
6016 case QRhiGraphicsPipeline::OneMinusConstantAlpha:
6017 return D3D12_BLEND_INV_BLEND_FACTOR;
6018 case QRhiGraphicsPipeline::SrcAlphaSaturate:
6019 return D3D12_BLEND_SRC_ALPHA_SAT;
6020 case QRhiGraphicsPipeline::Src1Color:
6021 return rgb ? D3D12_BLEND_SRC1_COLOR : D3D12_BLEND_SRC1_ALPHA;
6022 case QRhiGraphicsPipeline::OneMinusSrc1Color:
6023 return rgb ? D3D12_BLEND_INV_SRC1_COLOR : D3D12_BLEND_INV_SRC1_ALPHA;
6024 case QRhiGraphicsPipeline::Src1Alpha:
6025 return D3D12_BLEND_SRC1_ALPHA;
6026 case QRhiGraphicsPipeline::OneMinusSrc1Alpha:
6027 return D3D12_BLEND_INV_SRC1_ALPHA;
6028 }
6029 Q_UNREACHABLE_RETURN(D3D12_BLEND_ZERO);
6030}
6031
6032static inline D3D12_BLEND_OP toD3DBlendOp(QRhiGraphicsPipeline::BlendOp op)
6033{
6034 switch (op) {
6035 case QRhiGraphicsPipeline::Add:
6036 return D3D12_BLEND_OP_ADD;
6037 case QRhiGraphicsPipeline::Subtract:
6038 return D3D12_BLEND_OP_SUBTRACT;
6039 case QRhiGraphicsPipeline::ReverseSubtract:
6040 return D3D12_BLEND_OP_REV_SUBTRACT;
6041 case QRhiGraphicsPipeline::Min:
6042 return D3D12_BLEND_OP_MIN;
6043 case QRhiGraphicsPipeline::Max:
6044 return D3D12_BLEND_OP_MAX;
6045 }
6046 Q_UNREACHABLE_RETURN(D3D12_BLEND_OP_ADD);
6047}
6048
6049static inline D3D12_CULL_MODE toD3DCullMode(QRhiGraphicsPipeline::CullMode c)
6050{
6051 switch (c) {
6052 case QRhiGraphicsPipeline::None:
6053 return D3D12_CULL_MODE_NONE;
6054 case QRhiGraphicsPipeline::Front:
6055 return D3D12_CULL_MODE_FRONT;
6056 case QRhiGraphicsPipeline::Back:
6057 return D3D12_CULL_MODE_BACK;
6058 }
6059 Q_UNREACHABLE_RETURN(D3D12_CULL_MODE_NONE);
6060}
6061
6062static inline D3D12_FILL_MODE toD3DFillMode(QRhiGraphicsPipeline::PolygonMode mode)
6063{
6064 switch (mode) {
6065 case QRhiGraphicsPipeline::Fill:
6066 return D3D12_FILL_MODE_SOLID;
6067 case QRhiGraphicsPipeline::Line:
6068 return D3D12_FILL_MODE_WIREFRAME;
6069 }
6070 Q_UNREACHABLE_RETURN(D3D12_FILL_MODE_SOLID);
6071}
6072
6073static inline D3D12_COMPARISON_FUNC toD3DCompareOp(QRhiGraphicsPipeline::CompareOp op)
6074{
6075 switch (op) {
6076 case QRhiGraphicsPipeline::Never:
6077 return D3D12_COMPARISON_FUNC_NEVER;
6078 case QRhiGraphicsPipeline::Less:
6079 return D3D12_COMPARISON_FUNC_LESS;
6080 case QRhiGraphicsPipeline::Equal:
6081 return D3D12_COMPARISON_FUNC_EQUAL;
6082 case QRhiGraphicsPipeline::LessOrEqual:
6083 return D3D12_COMPARISON_FUNC_LESS_EQUAL;
6084 case QRhiGraphicsPipeline::Greater:
6085 return D3D12_COMPARISON_FUNC_GREATER;
6086 case QRhiGraphicsPipeline::NotEqual:
6087 return D3D12_COMPARISON_FUNC_NOT_EQUAL;
6088 case QRhiGraphicsPipeline::GreaterOrEqual:
6089 return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
6090 case QRhiGraphicsPipeline::Always:
6091 return D3D12_COMPARISON_FUNC_ALWAYS;
6092 }
6093 Q_UNREACHABLE_RETURN(D3D12_COMPARISON_FUNC_ALWAYS);
6094}
6095
6096static inline D3D12_STENCIL_OP toD3DStencilOp(QRhiGraphicsPipeline::StencilOp op)
6097{
6098 switch (op) {
6099 case QRhiGraphicsPipeline::StencilZero:
6100 return D3D12_STENCIL_OP_ZERO;
6101 case QRhiGraphicsPipeline::Keep:
6102 return D3D12_STENCIL_OP_KEEP;
6103 case QRhiGraphicsPipeline::Replace:
6104 return D3D12_STENCIL_OP_REPLACE;
6105 case QRhiGraphicsPipeline::IncrementAndClamp:
6106 return D3D12_STENCIL_OP_INCR_SAT;
6107 case QRhiGraphicsPipeline::DecrementAndClamp:
6108 return D3D12_STENCIL_OP_DECR_SAT;
6109 case QRhiGraphicsPipeline::Invert:
6110 return D3D12_STENCIL_OP_INVERT;
6111 case QRhiGraphicsPipeline::IncrementAndWrap:
6112 return D3D12_STENCIL_OP_INCR;
6113 case QRhiGraphicsPipeline::DecrementAndWrap:
6114 return D3D12_STENCIL_OP_DECR;
6115 }
6116 Q_UNREACHABLE_RETURN(D3D12_STENCIL_OP_KEEP);
6117}
6118
6119static inline D3D12_PRIMITIVE_TOPOLOGY toD3DTopology(QRhiGraphicsPipeline::Topology t, int patchControlPointCount)
6120{
6121 switch (t) {
6122 case QRhiGraphicsPipeline::Triangles:
6123 return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
6124 case QRhiGraphicsPipeline::TriangleStrip:
6125 return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
6126 case QRhiGraphicsPipeline::TriangleFan:
6127 qWarning("Triangle fans are not supported with D3D");
6128 return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
6129 case QRhiGraphicsPipeline::Lines:
6130 return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
6131 case QRhiGraphicsPipeline::LineStrip:
6132 return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
6133 case QRhiGraphicsPipeline::Points:
6134 return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
6135 case QRhiGraphicsPipeline::Patches:
6136 Q_ASSERT(patchControlPointCount >= 1 && patchControlPointCount <= 32);
6137 return D3D_PRIMITIVE_TOPOLOGY(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + (patchControlPointCount - 1));
6138 }
6139 Q_UNREACHABLE_RETURN(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
6140}
6141
6142static inline D3D12_PRIMITIVE_TOPOLOGY_TYPE toD3DTopologyType(QRhiGraphicsPipeline::Topology t)
6143{
6144 switch (t) {
6145 case QRhiGraphicsPipeline::Triangles:
6146 case QRhiGraphicsPipeline::TriangleStrip:
6147 case QRhiGraphicsPipeline::TriangleFan:
6148 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
6149 case QRhiGraphicsPipeline::Lines:
6150 case QRhiGraphicsPipeline::LineStrip:
6151 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
6152 case QRhiGraphicsPipeline::Points:
6153 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
6154 case QRhiGraphicsPipeline::Patches:
6155 return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
6156 }
6157 Q_UNREACHABLE_RETURN(D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE);
6158}
6159
6160static inline DXGI_FORMAT toD3DAttributeFormat(QRhiVertexInputAttribute::Format format)
6161{
6162 switch (format) {
6163 case QRhiVertexInputAttribute::Float4:
6164 return DXGI_FORMAT_R32G32B32A32_FLOAT;
6165 case QRhiVertexInputAttribute::Float3:
6166 return DXGI_FORMAT_R32G32B32_FLOAT;
6167 case QRhiVertexInputAttribute::Float2:
6168 return DXGI_FORMAT_R32G32_FLOAT;
6169 case QRhiVertexInputAttribute::Float:
6170 return DXGI_FORMAT_R32_FLOAT;
6171 case QRhiVertexInputAttribute::UNormByte4:
6172 return DXGI_FORMAT_R8G8B8A8_UNORM;
6173 case QRhiVertexInputAttribute::UNormByte2:
6174 return DXGI_FORMAT_R8G8_UNORM;
6175 case QRhiVertexInputAttribute::UNormByte:
6176 return DXGI_FORMAT_R8_UNORM;
6177 case QRhiVertexInputAttribute::UInt4:
6178 return DXGI_FORMAT_R32G32B32A32_UINT;
6179 case QRhiVertexInputAttribute::UInt3:
6180 return DXGI_FORMAT_R32G32B32_UINT;
6181 case QRhiVertexInputAttribute::UInt2:
6182 return DXGI_FORMAT_R32G32_UINT;
6183 case QRhiVertexInputAttribute::UInt:
6184 return DXGI_FORMAT_R32_UINT;
6185 case QRhiVertexInputAttribute::SInt4:
6186 return DXGI_FORMAT_R32G32B32A32_SINT;
6187 case QRhiVertexInputAttribute::SInt3:
6188 return DXGI_FORMAT_R32G32B32_SINT;
6189 case QRhiVertexInputAttribute::SInt2:
6190 return DXGI_FORMAT_R32G32_SINT;
6191 case QRhiVertexInputAttribute::SInt:
6192 return DXGI_FORMAT_R32_SINT;
6193 case QRhiVertexInputAttribute::Half4:
6194 // Note: D3D does not support half3. Pass through half3 as half4.
6195 case QRhiVertexInputAttribute::Half3:
6196 return DXGI_FORMAT_R16G16B16A16_FLOAT;
6197 case QRhiVertexInputAttribute::Half2:
6198 return DXGI_FORMAT_R16G16_FLOAT;
6199 case QRhiVertexInputAttribute::Half:
6200 return DXGI_FORMAT_R16_FLOAT;
6201 case QRhiVertexInputAttribute::UShort4:
6202 // Note: D3D does not support UShort3. Pass through UShort3 as UShort4.
6203 case QRhiVertexInputAttribute::UShort3:
6204 return DXGI_FORMAT_R16G16B16A16_UINT;
6205 case QRhiVertexInputAttribute::UShort2:
6206 return DXGI_FORMAT_R16G16_UINT;
6207 case QRhiVertexInputAttribute::UShort:
6208 return DXGI_FORMAT_R16_UINT;
6209 case QRhiVertexInputAttribute::SShort4:
6210 // Note: D3D does not support SShort3. Pass through SShort3 as SShort4.
6211 case QRhiVertexInputAttribute::SShort3:
6212 return DXGI_FORMAT_R16G16B16A16_SINT;
6213 case QRhiVertexInputAttribute::SShort2:
6214 return DXGI_FORMAT_R16G16_SINT;
6215 case QRhiVertexInputAttribute::SShort:
6216 return DXGI_FORMAT_R16_SINT;
6217 }
6218 Q_UNREACHABLE_RETURN(DXGI_FORMAT_R32G32B32A32_FLOAT);
6219}
6220
6221QD3D12GraphicsPipeline::QD3D12GraphicsPipeline(QRhiImplementation *rhi)
6222 : QRhiGraphicsPipeline(rhi)
6223{
6224}
6225
6226QD3D12GraphicsPipeline::~QD3D12GraphicsPipeline()
6227{
6228 destroy();
6229}
6230
6231void QD3D12GraphicsPipeline::destroy()
6232{
6233 if (handle.isNull())
6234 return;
6235
6236 QRHI_RES_RHI(QRhiD3D12);
6237 if (rhiD) {
6238 rhiD->releaseQueue.deferredReleasePipeline(handle);
6239 rhiD->releaseQueue.deferredReleaseRootSignature(rootSigHandle);
6240 }
6241
6242 handle = {};
6243 stageData = {};
6244
6245 if (rhiD)
6246 rhiD->unregisterResource(this);
6247}
6248
6249bool QD3D12GraphicsPipeline::create()
6250{
6251 if (!handle.isNull())
6252 destroy();
6253
6254 QRHI_RES_RHI(QRhiD3D12);
6255 if (!rhiD->sanityCheckGraphicsPipeline(this))
6256 return false;
6257
6258 rhiD->pipelineCreationStart();
6259
6260 QByteArray shaderBytecode[5];
6261 for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
6262 const QD3D12Stage d3dStage = qd3d12_stage(shaderStage.type());
6263 stageData[d3dStage].valid = true;
6264 stageData[d3dStage].stage = d3dStage;
6265 auto cacheIt = rhiD->shaderBytecodeCache.data.constFind(shaderStage);
6266 if (cacheIt != rhiD->shaderBytecodeCache.data.constEnd()) {
6267 shaderBytecode[d3dStage] = cacheIt->bytecode;
6268 stageData[d3dStage].nativeResourceBindingMap = cacheIt->nativeResourceBindingMap;
6269 } else {
6270 QString error;
6271 QShaderKey shaderKey;
6272 int compileFlags = 0;
6273 if (m_flags.testFlag(CompileShadersWithDebugInfo))
6274 compileFlags |= int(HlslCompileFlag::WithDebugInfo);
6275 const QByteArray bytecode = compileHlslShaderSource(shaderStage.shader(),
6276 shaderStage.shaderVariant(),
6277 compileFlags,
6278 &error,
6279 &shaderKey);
6280 if (bytecode.isEmpty()) {
6281 qWarning("HLSL graphics shader compilation failed: %s", qPrintable(error));
6282 return false;
6283 }
6284
6285 shaderBytecode[d3dStage] = bytecode;
6286 stageData[d3dStage].nativeResourceBindingMap = shaderStage.shader().nativeResourceBindingMap(shaderKey);
6287 rhiD->shaderBytecodeCache.insertWithCapacityLimit(shaderStage,
6288 { bytecode, stageData[d3dStage].nativeResourceBindingMap });
6289 }
6290 }
6291
6292 QD3D12ShaderResourceBindings *srbD = QRHI_RES(QD3D12ShaderResourceBindings, m_shaderResourceBindings);
6293 if (srbD) {
6294 rootSigHandle = srbD->createRootSignature(stageData.data(), 5);
6295 if (rootSigHandle.isNull()) {
6296 qWarning("Failed to create root signature");
6297 return false;
6298 }
6299 }
6300 ID3D12RootSignature *rootSig = nullptr;
6301 if (QD3D12RootSignature *rs = rhiD->rootSignaturePool.lookupRef(rootSigHandle))
6302 rootSig = rs->rootSig;
6303 if (!rootSig) {
6304 qWarning("Cannot create graphics pipeline state without root signature");
6305 return false;
6306 }
6307
6308 QD3D12RenderPassDescriptor *rpD = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc);
6309 DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN;
6310 if (rpD->colorAttachmentCount > 0) {
6311 format = DXGI_FORMAT(rpD->colorFormat[0]);
6312 } else if (rpD->hasDepthStencil) {
6313 format = DXGI_FORMAT(rpD->dsFormat);
6314 } else {
6315 qWarning("Cannot create graphics pipeline state without color or depthStencil format");
6316 return false;
6317 }
6318 const DXGI_SAMPLE_DESC sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, format);
6319
6320 struct {
6321 QD3D12PipelineStateSubObject<ID3D12RootSignature *, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE> rootSig;
6322 QD3D12PipelineStateSubObject<D3D12_INPUT_LAYOUT_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_INPUT_LAYOUT> inputLayout;
6323 QD3D12PipelineStateSubObject<D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_IB_STRIP_CUT_VALUE> primitiveRestartValue;
6324 QD3D12PipelineStateSubObject<D3D12_PRIMITIVE_TOPOLOGY_TYPE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY> primitiveTopology;
6325 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS> VS;
6326 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_HS> HS;
6327 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DS> DS;
6328 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_GS> GS;
6329 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS> PS;
6330 QD3D12PipelineStateSubObject<D3D12_RASTERIZER_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER> rasterizerState;
6331 QD3D12PipelineStateSubObject<D3D12_DEPTH_STENCIL_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL> depthStencilState;
6332 QD3D12PipelineStateSubObject<D3D12_BLEND_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND> blendState;
6333 QD3D12PipelineStateSubObject<D3D12_RT_FORMAT_ARRAY, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS> rtFormats;
6334 QD3D12PipelineStateSubObject<DXGI_FORMAT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT> dsFormat;
6335 QD3D12PipelineStateSubObject<DXGI_SAMPLE_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC> sampleDesc;
6336 QD3D12PipelineStateSubObject<UINT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK> sampleMask;
6337 QD3D12PipelineStateSubObject<D3D12_VIEW_INSTANCING_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VIEW_INSTANCING> viewInstancingDesc;
6338 } stream;
6339
6340 stream.rootSig.object = rootSig;
6341
6342 QVarLengthArray<D3D12_INPUT_ELEMENT_DESC, 4> inputDescs;
6343 QByteArrayList matrixSliceSemantics;
6344 if (!shaderBytecode[VS].isEmpty()) {
6345 for (auto it = m_vertexInputLayout.cbeginAttributes(), itEnd = m_vertexInputLayout.cendAttributes();
6346 it != itEnd; ++it)
6347 {
6348 D3D12_INPUT_ELEMENT_DESC desc = {};
6349 // The output from SPIRV-Cross uses TEXCOORD<location> as the
6350 // semantic, except for matrices that are unrolled into consecutive
6351 // vec2/3/4s attributes and need TEXCOORD<location>_ as
6352 // SemanticName and row/column index as SemanticIndex.
6353 const int matrixSlice = it->matrixSlice();
6354 if (matrixSlice < 0) {
6355 desc.SemanticName = "TEXCOORD";
6356 desc.SemanticIndex = UINT(it->location());
6357 } else {
6358 QByteArray sem;
6359 sem.resize(16);
6360 std::snprintf(sem.data(), sem.size(), "TEXCOORD%d_", it->location() - matrixSlice);
6361 matrixSliceSemantics.append(sem);
6362 desc.SemanticName = matrixSliceSemantics.last().constData();
6363 desc.SemanticIndex = UINT(matrixSlice);
6364 }
6365 desc.Format = toD3DAttributeFormat(it->format());
6366 desc.InputSlot = UINT(it->binding());
6367 desc.AlignedByteOffset = it->offset();
6368 const QRhiVertexInputBinding *inputBinding = m_vertexInputLayout.bindingAt(it->binding());
6369 if (inputBinding->classification() == QRhiVertexInputBinding::PerInstance) {
6370 desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
6371 desc.InstanceDataStepRate = inputBinding->instanceStepRate();
6372 } else {
6373 desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
6374 }
6375 inputDescs.append(desc);
6376 }
6377 }
6378
6379 stream.inputLayout.object.NumElements = inputDescs.count();
6380 stream.inputLayout.object.pInputElementDescs = inputDescs.isEmpty() ? nullptr : inputDescs.constData();
6381
6382 stream.primitiveRestartValue.object = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
6383
6384 stream.primitiveTopology.object = toD3DTopologyType(m_topology);
6385 topology = toD3DTopology(m_topology, m_patchControlPointCount);
6386
6387 for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
6388 const int d3dStage = qd3d12_stage(shaderStage.type());
6389 switch (d3dStage) {
6390 case VS:
6391 stream.VS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6392 stream.VS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6393 break;
6394 case HS:
6395 stream.HS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6396 stream.HS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6397 break;
6398 case DS:
6399 stream.DS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6400 stream.DS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6401 break;
6402 case GS:
6403 stream.GS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6404 stream.GS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6405 break;
6406 case PS:
6407 stream.PS.object.pShaderBytecode = shaderBytecode[d3dStage].constData();
6408 stream.PS.object.BytecodeLength = shaderBytecode[d3dStage].size();
6409 break;
6410 default:
6411 Q_UNREACHABLE();
6412 break;
6413 }
6414 }
6415
6416 stream.rasterizerState.object.FillMode = toD3DFillMode(m_polygonMode);
6417 stream.rasterizerState.object.CullMode = toD3DCullMode(m_cullMode);
6418 stream.rasterizerState.object.FrontCounterClockwise = m_frontFace == CCW;
6419 stream.rasterizerState.object.DepthBias = m_depthBias;
6420 stream.rasterizerState.object.SlopeScaledDepthBias = m_slopeScaledDepthBias;
6421 stream.rasterizerState.object.DepthClipEnable = m_depthClamp ? FALSE : TRUE;
6422 stream.rasterizerState.object.MultisampleEnable = sampleDesc.Count > 1;
6423
6424 stream.depthStencilState.object.DepthEnable = m_depthTest;
6425 stream.depthStencilState.object.DepthWriteMask = m_depthWrite ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
6426 stream.depthStencilState.object.DepthFunc = toD3DCompareOp(m_depthOp);
6427 stream.depthStencilState.object.StencilEnable = m_stencilTest;
6428 if (m_stencilTest) {
6429 stream.depthStencilState.object.StencilReadMask = UINT8(m_stencilReadMask);
6430 stream.depthStencilState.object.StencilWriteMask = UINT8(m_stencilWriteMask);
6431 stream.depthStencilState.object.FrontFace.StencilFailOp = toD3DStencilOp(m_stencilFront.failOp);
6432 stream.depthStencilState.object.FrontFace.StencilDepthFailOp = toD3DStencilOp(m_stencilFront.depthFailOp);
6433 stream.depthStencilState.object.FrontFace.StencilPassOp = toD3DStencilOp(m_stencilFront.passOp);
6434 stream.depthStencilState.object.FrontFace.StencilFunc = toD3DCompareOp(m_stencilFront.compareOp);
6435 stream.depthStencilState.object.BackFace.StencilFailOp = toD3DStencilOp(m_stencilBack.failOp);
6436 stream.depthStencilState.object.BackFace.StencilDepthFailOp = toD3DStencilOp(m_stencilBack.depthFailOp);
6437 stream.depthStencilState.object.BackFace.StencilPassOp = toD3DStencilOp(m_stencilBack.passOp);
6438 stream.depthStencilState.object.BackFace.StencilFunc = toD3DCompareOp(m_stencilBack.compareOp);
6439 }
6440
6441 stream.blendState.object.IndependentBlendEnable = m_targetBlends.count() > 1;
6442 for (int i = 0, ie = m_targetBlends.count(); i != ie; ++i) {
6443 const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[i]);
6444 D3D12_RENDER_TARGET_BLEND_DESC blend = {};
6445 blend.BlendEnable = b.enable;
6446 blend.SrcBlend = toD3DBlendFactor(b.srcColor, true);
6447 blend.DestBlend = toD3DBlendFactor(b.dstColor, true);
6448 blend.BlendOp = toD3DBlendOp(b.opColor);
6449 blend.SrcBlendAlpha = toD3DBlendFactor(b.srcAlpha, false);
6450 blend.DestBlendAlpha = toD3DBlendFactor(b.dstAlpha, false);
6451 blend.BlendOpAlpha = toD3DBlendOp(b.opAlpha);
6452 blend.RenderTargetWriteMask = toD3DColorWriteMask(b.colorWrite);
6453 stream.blendState.object.RenderTarget[i] = blend;
6454 }
6455 if (m_targetBlends.isEmpty()) {
6456 D3D12_RENDER_TARGET_BLEND_DESC blend = {};
6457 blend.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
6458 stream.blendState.object.RenderTarget[0] = blend;
6459 }
6460
6461 stream.rtFormats.object.NumRenderTargets = rpD->colorAttachmentCount;
6462 for (int i = 0; i < rpD->colorAttachmentCount; ++i)
6463 stream.rtFormats.object.RTFormats[i] = DXGI_FORMAT(rpD->colorFormat[i]);
6464
6465 stream.dsFormat.object = rpD->hasDepthStencil ? DXGI_FORMAT(rpD->dsFormat) : DXGI_FORMAT_UNKNOWN;
6466
6467 stream.sampleDesc.object = sampleDesc;
6468
6469 stream.sampleMask.object = 0xFFFFFFFF;
6470
6471 viewInstanceMask = 0;
6472 const bool isMultiView = m_multiViewCount >= 2;
6473 stream.viewInstancingDesc.object.ViewInstanceCount = isMultiView ? m_multiViewCount : 0;
6474 QVarLengthArray<D3D12_VIEW_INSTANCE_LOCATION, 4> viewInstanceLocations;
6475 if (isMultiView) {
6476 for (int i = 0; i < m_multiViewCount; ++i) {
6477 viewInstanceMask |= (1 << i);
6478 viewInstanceLocations.append({ 0, UINT(i) });
6479 }
6480 stream.viewInstancingDesc.object.pViewInstanceLocations = viewInstanceLocations.constData();
6481 }
6482
6483 const D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = { sizeof(stream), &stream };
6484
6485 ID3D12PipelineState *pso = nullptr;
6486 HRESULT hr = rhiD->dev->CreatePipelineState(&streamDesc, __uuidof(ID3D12PipelineState), reinterpret_cast<void **>(&pso));
6487 if (FAILED(hr)) {
6488 qWarning("Failed to create graphics pipeline state: %s",
6489 qPrintable(QSystemError::windowsComString(hr)));
6490 rhiD->rootSignaturePool.remove(rootSigHandle);
6491 rootSigHandle = {};
6492 return false;
6493 }
6494
6495 handle = QD3D12Pipeline::addToPool(&rhiD->pipelinePool, QD3D12Pipeline::Graphics, pso);
6496
6497 rhiD->pipelineCreationEnd();
6498 generation += 1;
6499 rhiD->registerResource(this);
6500 return true;
6501}
6502
6503QD3D12ComputePipeline::QD3D12ComputePipeline(QRhiImplementation *rhi)
6504 : QRhiComputePipeline(rhi)
6505{
6506}
6507
6508QD3D12ComputePipeline::~QD3D12ComputePipeline()
6509{
6510 destroy();
6511}
6512
6513void QD3D12ComputePipeline::destroy()
6514{
6515 if (handle.isNull())
6516 return;
6517
6518 QRHI_RES_RHI(QRhiD3D12);
6519 if (rhiD) {
6520 rhiD->releaseQueue.deferredReleasePipeline(handle);
6521 rhiD->releaseQueue.deferredReleaseRootSignature(rootSigHandle);
6522 }
6523
6524 handle = {};
6525 stageData = {};
6526
6527 if (rhiD)
6528 rhiD->unregisterResource(this);
6529}
6530
6531bool QD3D12ComputePipeline::create()
6532{
6533 if (!handle.isNull())
6534 destroy();
6535
6536 QRHI_RES_RHI(QRhiD3D12);
6537 rhiD->pipelineCreationStart();
6538
6539 stageData.valid = true;
6540 stageData.stage = CS;
6541
6542 QByteArray shaderBytecode;
6543 auto cacheIt = rhiD->shaderBytecodeCache.data.constFind(m_shaderStage);
6544 if (cacheIt != rhiD->shaderBytecodeCache.data.constEnd()) {
6545 shaderBytecode = cacheIt->bytecode;
6546 stageData.nativeResourceBindingMap = cacheIt->nativeResourceBindingMap;
6547 } else {
6548 QString error;
6549 QShaderKey shaderKey;
6550 int compileFlags = 0;
6551 if (m_flags.testFlag(CompileShadersWithDebugInfo))
6552 compileFlags |= int(HlslCompileFlag::WithDebugInfo);
6553 const QByteArray bytecode = compileHlslShaderSource(m_shaderStage.shader(),
6554 m_shaderStage.shaderVariant(),
6555 compileFlags,
6556 &error,
6557 &shaderKey);
6558 if (bytecode.isEmpty()) {
6559 qWarning("HLSL compute shader compilation failed: %s", qPrintable(error));
6560 return false;
6561 }
6562
6563 shaderBytecode = bytecode;
6564 stageData.nativeResourceBindingMap = m_shaderStage.shader().nativeResourceBindingMap(shaderKey);
6565 rhiD->shaderBytecodeCache.insertWithCapacityLimit(m_shaderStage, { bytecode,
6566 stageData.nativeResourceBindingMap });
6567 }
6568
6569 QD3D12ShaderResourceBindings *srbD = QRHI_RES(QD3D12ShaderResourceBindings, m_shaderResourceBindings);
6570 if (srbD) {
6571 rootSigHandle = srbD->createRootSignature(&stageData, 1);
6572 if (rootSigHandle.isNull()) {
6573 qWarning("Failed to create root signature");
6574 return false;
6575 }
6576 }
6577 ID3D12RootSignature *rootSig = nullptr;
6578 if (QD3D12RootSignature *rs = rhiD->rootSignaturePool.lookupRef(rootSigHandle))
6579 rootSig = rs->rootSig;
6580 if (!rootSig) {
6581 qWarning("Cannot create compute pipeline state without root signature");
6582 return false;
6583 }
6584
6585 struct {
6586 QD3D12PipelineStateSubObject<ID3D12RootSignature *, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE> rootSig;
6587 QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS> CS;
6588 } stream;
6589 stream.rootSig.object = rootSig;
6590 stream.CS.object.pShaderBytecode = shaderBytecode.constData();
6591 stream.CS.object.BytecodeLength = shaderBytecode.size();
6592 const D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = { sizeof(stream), &stream };
6593 ID3D12PipelineState *pso = nullptr;
6594 HRESULT hr = rhiD->dev->CreatePipelineState(&streamDesc, __uuidof(ID3D12PipelineState), reinterpret_cast<void **>(&pso));
6595 if (FAILED(hr)) {
6596 qWarning("Failed to create compute pipeline state: %s",
6597 qPrintable(QSystemError::windowsComString(hr)));
6598 rhiD->rootSignaturePool.remove(rootSigHandle);
6599 rootSigHandle = {};
6600 return false;
6601 }
6602
6603 handle = QD3D12Pipeline::addToPool(&rhiD->pipelinePool, QD3D12Pipeline::Compute, pso);
6604
6605 rhiD->pipelineCreationEnd();
6606 generation += 1;
6607 rhiD->registerResource(this);
6608 return true;
6609}
6610
6611// This is a lot like in the Metal backend: we need to now the rtv and dsv
6612// formats to create a graphics pipeline, and that's exactly what our
6613// "renderpass descriptor" is going to hold.
6614QD3D12RenderPassDescriptor::QD3D12RenderPassDescriptor(QRhiImplementation *rhi)
6615 : QRhiRenderPassDescriptor(rhi)
6616{
6617 serializedFormatData.reserve(16);
6618}
6619
6620QD3D12RenderPassDescriptor::~QD3D12RenderPassDescriptor()
6621{
6622 destroy();
6623}
6624
6625void QD3D12RenderPassDescriptor::destroy()
6626{
6627 QRHI_RES_RHI(QRhiD3D12);
6628 if (rhiD)
6629 rhiD->unregisterResource(this);
6630}
6631
6632bool QD3D12RenderPassDescriptor::isCompatible(const QRhiRenderPassDescriptor *other) const
6633{
6634 if (!other)
6635 return false;
6636
6637 const QD3D12RenderPassDescriptor *o = QRHI_RES(const QD3D12RenderPassDescriptor, other);
6638
6639 if (colorAttachmentCount != o->colorAttachmentCount)
6640 return false;
6641
6642 if (hasDepthStencil != o->hasDepthStencil)
6643 return false;
6644
6645 for (int i = 0; i < colorAttachmentCount; ++i) {
6646 if (colorFormat[i] != o->colorFormat[i])
6647 return false;
6648 }
6649
6650 if (hasDepthStencil) {
6651 if (dsFormat != o->dsFormat)
6652 return false;
6653 }
6654
6655 if (hasShadingRateMap != o->hasShadingRateMap)
6656 return false;
6657
6658 return true;
6659}
6660
6661void QD3D12RenderPassDescriptor::updateSerializedFormat()
6662{
6663 serializedFormatData.clear();
6664 auto p = std::back_inserter(serializedFormatData);
6665
6666 *p++ = colorAttachmentCount;
6667 *p++ = hasDepthStencil;
6668 for (int i = 0; i < colorAttachmentCount; ++i)
6669 *p++ = colorFormat[i];
6670 *p++ = hasDepthStencil ? dsFormat : 0;
6671}
6672
6673QRhiRenderPassDescriptor *QD3D12RenderPassDescriptor::newCompatibleRenderPassDescriptor() const
6674{
6675 QD3D12RenderPassDescriptor *rpD = new QD3D12RenderPassDescriptor(m_rhi);
6676 rpD->colorAttachmentCount = colorAttachmentCount;
6677 rpD->hasDepthStencil = hasDepthStencil;
6678 memcpy(rpD->colorFormat, colorFormat, sizeof(colorFormat));
6679 rpD->dsFormat = dsFormat;
6680 rpD->hasShadingRateMap = hasShadingRateMap;
6681
6682 rpD->updateSerializedFormat();
6683
6684 QRHI_RES_RHI(QRhiD3D12);
6685 rhiD->registerResource(rpD);
6686 return rpD;
6687}
6688
6689QVector<quint32> QD3D12RenderPassDescriptor::serializedFormat() const
6690{
6691 return serializedFormatData;
6692}
6693
6694QD3D12CommandBuffer::QD3D12CommandBuffer(QRhiImplementation *rhi)
6695 : QRhiCommandBuffer(rhi)
6696{
6697 resetState();
6698}
6699
6700QD3D12CommandBuffer::~QD3D12CommandBuffer()
6701{
6702 destroy();
6703}
6704
6705void QD3D12CommandBuffer::destroy()
6706{
6707 // nothing to do here, the command list is not owned by us
6708}
6709
6710const QRhiNativeHandles *QD3D12CommandBuffer::nativeHandles()
6711{
6712 nativeHandlesStruct.commandList = cmdList;
6713 return &nativeHandlesStruct;
6714}
6715
6716QD3D12SwapChainRenderTarget::QD3D12SwapChainRenderTarget(QRhiImplementation *rhi, QRhiSwapChain *swapchain)
6717 : QRhiSwapChainRenderTarget(rhi, swapchain),
6718 d(rhi)
6719{
6720}
6721
6722QD3D12SwapChainRenderTarget::~QD3D12SwapChainRenderTarget()
6723{
6724 destroy();
6725}
6726
6727void QD3D12SwapChainRenderTarget::destroy()
6728{
6729 // nothing to do here
6730}
6731
6732QSize QD3D12SwapChainRenderTarget::pixelSize() const
6733{
6734 return d.pixelSize;
6735}
6736
6737float QD3D12SwapChainRenderTarget::devicePixelRatio() const
6738{
6739 return d.dpr;
6740}
6741
6742int QD3D12SwapChainRenderTarget::sampleCount() const
6743{
6744 return d.sampleCount;
6745}
6746
6747QD3D12SwapChain::QD3D12SwapChain(QRhiImplementation *rhi)
6748 : QRhiSwapChain(rhi),
6749 rtWrapper(rhi, this),
6750 rtWrapperRight(rhi, this),
6751 cbWrapper(rhi)
6752{
6753}
6754
6755QD3D12SwapChain::~QD3D12SwapChain()
6756{
6757 destroy();
6758}
6759
6760void QD3D12SwapChain::destroy()
6761{
6762 if (!swapChain)
6763 return;
6764
6765 releaseBuffers();
6766
6767 swapChain->Release();
6768 swapChain = nullptr;
6769 sourceSwapChain1->Release();
6770 sourceSwapChain1 = nullptr;
6771
6772 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
6773 FrameResources &fr(frameRes[i]);
6774 if (fr.fence)
6775 fr.fence->Release();
6776 if (fr.fenceEvent)
6777 CloseHandle(fr.fenceEvent);
6778 if (fr.cmdList)
6779 fr.cmdList->Release();
6780 fr = {};
6781 }
6782
6783 if (dcompVisual) {
6784 dcompVisual->Release();
6785 dcompVisual = nullptr;
6786 }
6787
6788 if (dcompTarget) {
6789 dcompTarget->Release();
6790 dcompTarget = nullptr;
6791 }
6792
6793 if (frameLatencyWaitableObject) {
6794 CloseHandle(frameLatencyWaitableObject);
6795 frameLatencyWaitableObject = nullptr;
6796 }
6797
6798 QDxgiVSyncService::instance()->unregisterWindow(window);
6799
6800 QRHI_RES_RHI(QRhiD3D12);
6801 if (rhiD) {
6802 rhiD->swapchains.remove(this);
6803 rhiD->unregisterResource(this);
6804 }
6805}
6806
6807void QD3D12SwapChain::releaseBuffers()
6808{
6809 QRHI_RES_RHI(QRhiD3D12);
6810 rhiD->waitGpu();
6811 for (UINT i = 0; i < BUFFER_COUNT; ++i) {
6812 rhiD->resourcePool.remove(colorBuffers[i]);
6813 rhiD->rtvPool.release(rtvs[i], 1);
6814 if (stereo)
6815 rhiD->rtvPool.release(rtvsRight[i], 1);
6816 if (!msaaBuffers[i].isNull())
6817 rhiD->resourcePool.remove(msaaBuffers[i]);
6818 if (msaaRtvs[i].isValid())
6819 rhiD->rtvPool.release(msaaRtvs[i], 1);
6820 }
6821}
6822
6823void QD3D12SwapChain::waitCommandCompletionForFrameSlot(int frameSlot)
6824{
6825 FrameResources &fr(frameRes[frameSlot]);
6826 if (fr.fence->GetCompletedValue() < fr.fenceCounter) {
6827 fr.fence->SetEventOnCompletion(fr.fenceCounter, fr.fenceEvent);
6828 WaitForSingleObject(fr.fenceEvent, INFINITE);
6829 }
6830}
6831
6832void QD3D12SwapChain::addCommandCompletionSignalForCurrentFrameSlot()
6833{
6834 QRHI_RES_RHI(QRhiD3D12);
6835 FrameResources &fr(frameRes[currentFrameSlot]);
6836 fr.fenceCounter += 1u;
6837 rhiD->cmdQueue->Signal(fr.fence, fr.fenceCounter);
6838}
6839
6840QRhiCommandBuffer *QD3D12SwapChain::currentFrameCommandBuffer()
6841{
6842 return &cbWrapper;
6843}
6844
6845QRhiRenderTarget *QD3D12SwapChain::currentFrameRenderTarget()
6846{
6847 return &rtWrapper;
6848}
6849
6850QRhiRenderTarget *QD3D12SwapChain::currentFrameRenderTarget(StereoTargetBuffer targetBuffer)
6851{
6852 return !stereo || targetBuffer == StereoTargetBuffer::LeftBuffer ? &rtWrapper : &rtWrapperRight;
6853}
6854
6855QSize QD3D12SwapChain::surfacePixelSize()
6856{
6857 Q_ASSERT(m_window);
6858 return m_window->size() * m_window->devicePixelRatio();
6859}
6860
6861bool QD3D12SwapChain::isFormatSupported(Format f)
6862{
6863 if (f == SDR)
6864 return true;
6865
6866 if (!m_window) {
6867 qWarning("Attempted to call isFormatSupported() without a window set");
6868 return false;
6869 }
6870
6871 QRHI_RES_RHI(QRhiD3D12);
6872 if (QDxgiHdrInfo(rhiD->activeAdapter).isHdrCapable(m_window))
6873 return f == QRhiSwapChain::HDRExtendedSrgbLinear || f == QRhiSwapChain::HDR10;
6874
6875 return false;
6876}
6877
6878QRhiSwapChainHdrInfo QD3D12SwapChain::hdrInfo()
6879{
6880 QRhiSwapChainHdrInfo info = QRhiSwapChain::hdrInfo();
6881 // Must use m_window, not window, given this may be called before createOrResize().
6882 if (m_window) {
6883 QRHI_RES_RHI(QRhiD3D12);
6884 info = QDxgiHdrInfo(rhiD->activeAdapter).queryHdrInfo(m_window);
6885 }
6886 return info;
6887}
6888
6889QRhiRenderPassDescriptor *QD3D12SwapChain::newCompatibleRenderPassDescriptor()
6890{
6891 // not yet built so cannot rely on data computed in createOrResize()
6892 chooseFormats();
6893
6894 QD3D12RenderPassDescriptor *rpD = new QD3D12RenderPassDescriptor(m_rhi);
6895 rpD->colorAttachmentCount = 1;
6896 rpD->hasDepthStencil = m_depthStencil != nullptr;
6897 rpD->colorFormat[0] = int(srgbAdjustedColorFormat);
6898 rpD->dsFormat = QD3D12RenderBuffer::DS_FORMAT;
6899
6900 rpD->hasShadingRateMap = m_shadingRateMap != nullptr;
6901
6902 rpD->updateSerializedFormat();
6903
6904 QRHI_RES_RHI(QRhiD3D12);
6905 rhiD->registerResource(rpD);
6906 return rpD;
6907}
6908
6909bool QRhiD3D12::ensureDirectCompositionDevice()
6910{
6911 if (dcompDevice)
6912 return true;
6913
6914 qCDebug(QRHI_LOG_INFO, "Creating Direct Composition device (needed for semi-transparent windows)");
6915 dcompDevice = QRhiD3D::createDirectCompositionDevice();
6916 return dcompDevice ? true : false;
6917}
6918
6919static const DXGI_FORMAT DEFAULT_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM;
6920static const DXGI_FORMAT DEFAULT_SRGB_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
6921
6922void QD3D12SwapChain::chooseFormats()
6923{
6924 colorFormat = DEFAULT_FORMAT;
6925 srgbAdjustedColorFormat = m_flags.testFlag(sRGB) ? DEFAULT_SRGB_FORMAT : DEFAULT_FORMAT;
6926 hdrColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; // SDR
6927 QRHI_RES_RHI(QRhiD3D12);
6928 if (m_format != SDR) {
6929 if (QDxgiHdrInfo(rhiD->activeAdapter).isHdrCapable(m_window)) {
6930 // https://docs.microsoft.com/en-us/windows/win32/direct3darticles/high-dynamic-range
6931 switch (m_format) {
6932 case HDRExtendedSrgbLinear:
6933 colorFormat = DXGI_FORMAT_R16G16B16A16_FLOAT;
6934 hdrColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709;
6935 srgbAdjustedColorFormat = colorFormat;
6936 break;
6937 case HDR10:
6938 colorFormat = DXGI_FORMAT_R10G10B10A2_UNORM;
6939 hdrColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020;
6940 srgbAdjustedColorFormat = colorFormat;
6941 break;
6942 default:
6943 break;
6944 }
6945 } else {
6946 // This happens also when Use HDR is set to Off in the Windows
6947 // Display settings. Show a helpful warning, but continue with the
6948 // default non-HDR format.
6949 qWarning("The output associated with the window is not HDR capable "
6950 "(or Use HDR is Off in the Display Settings), ignoring HDR format request");
6951 }
6952 }
6953 sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, colorFormat);
6954}
6955
6956bool QD3D12SwapChain::createOrResize()
6957{
6958 // Can be called multiple times due to window resizes - that is not the
6959 // same as a simple destroy+create (as with other resources). Just need to
6960 // resize the buffers then.
6961
6962 const bool needsRegistration = !window || window != m_window;
6963
6964 // except if the window actually changes
6965 if (window && window != m_window)
6966 destroy();
6967
6968 window = m_window;
6969 m_currentPixelSize = surfacePixelSize();
6970 pixelSize = m_currentPixelSize;
6971
6972 if (pixelSize.isEmpty())
6973 return false;
6974
6975 HWND hwnd = reinterpret_cast<HWND>(window->winId());
6976 HRESULT hr;
6977 QRHI_RES_RHI(QRhiD3D12);
6978 stereo = m_window->format().stereo() && rhiD->dxgiFactory->IsWindowedStereoEnabled();
6979
6980 if (m_flags.testFlag(SurfaceHasPreMulAlpha) || m_flags.testFlag(SurfaceHasNonPreMulAlpha)) {
6981 if (rhiD->ensureDirectCompositionDevice()) {
6982 if (!dcompTarget) {
6983 hr = rhiD->dcompDevice->CreateTargetForHwnd(hwnd, false, &dcompTarget);
6984 if (FAILED(hr)) {
6985 qWarning("Failed to create Direct Composition target for the window: %s",
6986 qPrintable(QSystemError::windowsComString(hr)));
6987 }
6988 }
6989 if (dcompTarget && !dcompVisual) {
6990 hr = rhiD->dcompDevice->CreateVisual(&dcompVisual);
6991 if (FAILED(hr)) {
6992 qWarning("Failed to create DirectComposition visual: %s",
6993 qPrintable(QSystemError::windowsComString(hr)));
6994 }
6995 }
6996 }
6997 // simple consistency check
6998 if (window->requestedFormat().alphaBufferSize() <= 0)
6999 qWarning("Swapchain says surface has alpha but the window has no alphaBufferSize set. "
7000 "This may lead to problems.");
7001 }
7002
7003 swapInterval = m_flags.testFlag(QRhiSwapChain::NoVSync) ? 0 : 1;
7004 swapChainFlags = 0;
7005 if (swapInterval == 0 && rhiD->supportsAllowTearing)
7006 swapChainFlags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
7007
7008 // maxFrameLatency 0 means no waitable object usage.
7009 // Ignore it also when NoVSync is on, and when using WARP.
7010 const bool useFrameLatencyWaitableObject = rhiD->maxFrameLatency != 0
7011 && swapInterval != 0
7012 && rhiD->driverInfoStruct.deviceType != QRhiDriverInfo::CpuDevice;
7013 if (useFrameLatencyWaitableObject)
7014 swapChainFlags |= DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
7015
7016 if (!swapChain) {
7017 chooseFormats();
7018
7019 DXGI_SWAP_CHAIN_DESC1 desc = {};
7020 desc.Width = UINT(pixelSize.width());
7021 desc.Height = UINT(pixelSize.height());
7022 desc.Format = colorFormat;
7023 desc.SampleDesc.Count = 1;
7024 desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
7025 desc.BufferCount = BUFFER_COUNT;
7026 desc.Flags = swapChainFlags;
7027 desc.Scaling = DXGI_SCALING_NONE;
7028 desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
7029 desc.Stereo = stereo;
7030
7031 if (dcompVisual) {
7032 // With DirectComposition setting AlphaMode to STRAIGHT fails the
7033 // swapchain creation, whereas the result seems to be identical
7034 // with any of the other values, including IGNORE. (?)
7035 desc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED;
7036
7037 // DirectComposition has its own limitations, cannot use
7038 // SCALING_NONE. So with semi-transparency requested we are forced
7039 // to SCALING_STRETCH.
7040 desc.Scaling = DXGI_SCALING_STRETCH;
7041 }
7042
7043 if (dcompVisual)
7044 hr = rhiD->dxgiFactory->CreateSwapChainForComposition(rhiD->cmdQueue, &desc, nullptr, &sourceSwapChain1);
7045 else
7046 hr = rhiD->dxgiFactory->CreateSwapChainForHwnd(rhiD->cmdQueue, hwnd, &desc, nullptr, nullptr, &sourceSwapChain1);
7047
7048 // If failed and we tried a HDR format, then try with SDR. This
7049 // matches other backends, such as Vulkan where if the format is
7050 // not supported, the default one is used instead.
7051 if (FAILED(hr) && m_format != SDR) {
7052 colorFormat = DEFAULT_FORMAT;
7053 desc.Format = DEFAULT_FORMAT;
7054 if (dcompVisual)
7055 hr = rhiD->dxgiFactory->CreateSwapChainForComposition(rhiD->cmdQueue, &desc, nullptr, &sourceSwapChain1);
7056 else
7057 hr = rhiD->dxgiFactory->CreateSwapChainForHwnd(rhiD->cmdQueue, hwnd, &desc, nullptr, nullptr, &sourceSwapChain1);
7058 }
7059
7060 if (SUCCEEDED(hr)) {
7061 if (FAILED(sourceSwapChain1->QueryInterface(__uuidof(IDXGISwapChain3), reinterpret_cast<void **>(&swapChain)))) {
7062 qWarning("IDXGISwapChain3 not available");
7063 return false;
7064 }
7065 if (m_format != SDR) {
7066 hr = swapChain->SetColorSpace1(hdrColorSpace);
7067 if (FAILED(hr)) {
7068 qWarning("Failed to set color space on swapchain: %s",
7069 qPrintable(QSystemError::windowsComString(hr)));
7070 }
7071 }
7072 if (useFrameLatencyWaitableObject) {
7073 swapChain->SetMaximumFrameLatency(rhiD->maxFrameLatency);
7074 frameLatencyWaitableObject = swapChain->GetFrameLatencyWaitableObject();
7075 }
7076 if (dcompVisual) {
7077 hr = dcompVisual->SetContent(swapChain);
7078 if (SUCCEEDED(hr)) {
7079 hr = dcompTarget->SetRoot(dcompVisual);
7080 if (FAILED(hr)) {
7081 qWarning("Failed to associate Direct Composition visual with the target: %s",
7082 qPrintable(QSystemError::windowsComString(hr)));
7083 }
7084 } else {
7085 qWarning("Failed to set content for Direct Composition visual: %s",
7086 qPrintable(QSystemError::windowsComString(hr)));
7087 }
7088 } else {
7089 // disable Alt+Enter; not relevant when using DirectComposition
7090 rhiD->dxgiFactory->MakeWindowAssociation(hwnd, DXGI_MWA_NO_WINDOW_CHANGES);
7091 }
7092 }
7093 if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
7094 qWarning("Device loss detected during swapchain creation");
7095 rhiD->deviceLost = true;
7096 return false;
7097 } else if (FAILED(hr)) {
7098 qWarning("Failed to create D3D12 swapchain: %s"
7099 " (Width=%u Height=%u Format=%u SampleCount=%u BufferCount=%u Scaling=%u SwapEffect=%u Stereo=%u)",
7100 qPrintable(QSystemError::windowsComString(hr)),
7101 desc.Width, desc.Height, UINT(desc.Format), desc.SampleDesc.Count,
7102 desc.BufferCount, UINT(desc.Scaling), UINT(desc.SwapEffect), UINT(desc.Stereo));
7103 return false;
7104 }
7105
7106 for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) {
7107 hr = rhiD->dev->CreateFence(0,
7108 D3D12_FENCE_FLAG_NONE,
7109 __uuidof(ID3D12Fence),
7110 reinterpret_cast<void **>(&frameRes[i].fence));
7111 if (FAILED(hr)) {
7112 qWarning("Failed to create fence for swapchain: %s",
7113 qPrintable(QSystemError::windowsComString(hr)));
7114 return false;
7115 }
7116 frameRes[i].fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
7117
7118 frameRes[i].fenceCounter = 0;
7119 }
7120 } else {
7121 releaseBuffers();
7122 hr = swapChain->ResizeBuffers(BUFFER_COUNT,
7123 UINT(pixelSize.width()),
7124 UINT(pixelSize.height()),
7125 colorFormat,
7126 swapChainFlags);
7127 if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) {
7128 qWarning("Device loss detected in ResizeBuffers()");
7129 rhiD->deviceLost = true;
7130 return false;
7131 } else if (FAILED(hr)) {
7132 qWarning("Failed to resize D3D12 swapchain: %s", qPrintable(QSystemError::windowsComString(hr)));
7133 return false;
7134 }
7135 }
7136
7137 for (UINT i = 0; i < BUFFER_COUNT; ++i) {
7138 ID3D12Resource *colorBuffer;
7139 hr = swapChain->GetBuffer(i, __uuidof(ID3D12Resource), reinterpret_cast<void **>(&colorBuffer));
7140 if (FAILED(hr)) {
7141 qWarning("Failed to get buffer %u for D3D12 swapchain: %s",
7142 i, qPrintable(QSystemError::windowsComString(hr)));
7143 return false;
7144 }
7145 colorBuffers[i] = QD3D12Resource::addToPool(&rhiD->resourcePool, colorBuffer, D3D12_RESOURCE_STATE_PRESENT);
7146 rtvs[i] = rhiD->rtvPool.allocate(1);
7147 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
7148 rtvDesc.Format = srgbAdjustedColorFormat;
7149 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
7150 rhiD->dev->CreateRenderTargetView(colorBuffer, &rtvDesc, rtvs[i].cpuHandle);
7151
7152 if (stereo) {
7153 rtvsRight[i] = rhiD->rtvPool.allocate(1);
7154 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
7155 rtvDesc.Format = srgbAdjustedColorFormat;
7156 rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
7157 rtvDesc.Texture2DArray.ArraySize = 1;
7158 rtvDesc.Texture2DArray.FirstArraySlice = 1;
7159 rhiD->dev->CreateRenderTargetView(colorBuffer, &rtvDesc, rtvsRight[i].cpuHandle);
7160 }
7161 }
7162
7163 if (m_depthStencil && m_depthStencil->sampleCount() != m_sampleCount) {
7164 qWarning("Depth-stencil buffer's sampleCount (%d) does not match color buffers' sample count (%d). Expect problems.",
7165 m_depthStencil->sampleCount(), m_sampleCount);
7166 }
7167 if (m_depthStencil && m_depthStencil->pixelSize() != pixelSize) {
7168 if (m_depthStencil->flags().testFlag(QRhiRenderBuffer::UsedWithSwapChainOnly)) {
7169 m_depthStencil->setPixelSize(pixelSize);
7170 if (!m_depthStencil->create())
7171 qWarning("Failed to rebuild swapchain's associated depth-stencil buffer for size %dx%d",
7172 pixelSize.width(), pixelSize.height());
7173 } else {
7174 qWarning("Depth-stencil buffer's size (%dx%d) does not match the surface size (%dx%d). Expect problems.",
7175 m_depthStencil->pixelSize().width(), m_depthStencil->pixelSize().height(),
7176 pixelSize.width(), pixelSize.height());
7177 }
7178 }
7179
7180 ds = m_depthStencil ? QRHI_RES(QD3D12RenderBuffer, m_depthStencil) : nullptr;
7181
7182 if (sampleDesc.Count > 1) {
7183 for (UINT i = 0; i < BUFFER_COUNT; ++i) {
7184 D3D12_RESOURCE_DESC resourceDesc = {};
7185 resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
7186 resourceDesc.Width = UINT64(pixelSize.width());
7187 resourceDesc.Height = UINT(pixelSize.height());
7188 resourceDesc.DepthOrArraySize = 1;
7189 resourceDesc.MipLevels = 1;
7190 resourceDesc.Format = srgbAdjustedColorFormat;
7191 resourceDesc.SampleDesc = sampleDesc;
7192 resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
7193 resourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
7194 D3D12_CLEAR_VALUE clearValue = {};
7195 clearValue.Format = colorFormat;
7196 ID3D12Resource *resource = nullptr;
7197 D3D12MA::Allocation *allocation = nullptr;
7198 HRESULT hr = rhiD->vma.createResource(D3D12_HEAP_TYPE_DEFAULT,
7199 &resourceDesc,
7200 D3D12_RESOURCE_STATE_RENDER_TARGET,
7201 &clearValue,
7202 &allocation,
7203 __uuidof(ID3D12Resource),
7204 reinterpret_cast<void **>(&resource));
7205 if (FAILED(hr)) {
7206 qWarning("Failed to create MSAA color buffer: %s", qPrintable(QSystemError::windowsComString(hr)));
7207 return false;
7208 }
7209 msaaBuffers[i] = QD3D12Resource::addToPool(&rhiD->resourcePool, resource, D3D12_RESOURCE_STATE_RENDER_TARGET, allocation);
7210 msaaRtvs[i] = rhiD->rtvPool.allocate(1);
7211 if (!msaaRtvs[i].isValid())
7212 return false;
7213 D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {};
7214 rtvDesc.Format = srgbAdjustedColorFormat;
7215 rtvDesc.ViewDimension = sampleDesc.Count > 1 ? D3D12_RTV_DIMENSION_TEXTURE2DMS
7216 : D3D12_RTV_DIMENSION_TEXTURE2D;
7217 rhiD->dev->CreateRenderTargetView(resource, &rtvDesc, msaaRtvs[i].cpuHandle);
7218 }
7219 }
7220
7221 currentBackBufferIndex = swapChain->GetCurrentBackBufferIndex();
7222 currentFrameSlot = 0;
7223 lastFrameLatencyWaitSlot = -1; // wait already in the first frame, as instructed in the dxgi docs
7224
7225 rtWrapper.setRenderPassDescriptor(m_renderPassDesc); // for the public getter in QRhiRenderTarget
7226 QD3D12SwapChainRenderTarget *rtD = QRHI_RES(QD3D12SwapChainRenderTarget, &rtWrapper);
7227 rtD->d.rp = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc);
7228 rtD->d.pixelSize = pixelSize;
7229 rtD->d.dpr = float(window->devicePixelRatio());
7230 rtD->d.sampleCount = int(sampleDesc.Count);
7231 rtD->d.colorAttCount = 1;
7232 rtD->d.dsAttCount = m_depthStencil ? 1 : 0;
7233
7234 rtWrapperRight.setRenderPassDescriptor(m_renderPassDesc);
7235 QD3D12SwapChainRenderTarget *rtDr = QRHI_RES(QD3D12SwapChainRenderTarget, &rtWrapperRight);
7236 rtDr->d.rp = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc);
7237 rtDr->d.pixelSize = pixelSize;
7238 rtDr->d.dpr = float(window->devicePixelRatio());
7239 rtDr->d.sampleCount = int(sampleDesc.Count);
7240 rtDr->d.colorAttCount = 1;
7241 rtDr->d.dsAttCount = m_depthStencil ? 1 : 0;
7242
7243 QDxgiVSyncService::instance()->registerWindow(window);
7244
7245 if (needsRegistration || !rhiD->swapchains.contains(this))
7246 rhiD->swapchains.insert(this);
7247
7248 rhiD->registerResource(this);
7249
7250 return true;
7251}
7252
7253QT_END_NAMESPACE
7254
7255#endif // __ID3D12Device2_INTERFACE_DEFINED__
#define __has_include(x)