Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qrhimetal.mm
Go to the documentation of this file.
1// Copyright (C) 2023 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qrhimetal_p.h"
5#include "qshader_p.h"
6#include <QGuiApplication>
7#include <QWindow>
8#include <QUrl>
9#include <QFile>
10#include <QTemporaryFile>
11#include <QFileInfo>
12#include <qmath.h>
13#include <QOperatingSystemVersion>
14
15#include <QtCore/private/qcore_mac_p.h>
16#include <QtGui/private/qmetallayer_p.h>
17
18#ifdef Q_OS_MACOS
19#include <AppKit/AppKit.h>
20#else
21#include <UIKit/UIKit.h>
22#endif
23
24#include <QuartzCore/CATransaction.h>
25
26#include <Metal/Metal.h>
27
29
30/*
31 Metal backend. Double buffers and throttles to vsync. "Dynamic" buffers are
32 Shared (host visible) and duplicated (to help having 2 frames in flight),
33 "static" and "immutable" are Managed on macOS and Shared on iOS/tvOS.
34 Textures are Private (device local) and a host visible staging buffer is
35 used to upload data to them. Does not rely on strong objects refs from
36 command buffers but does rely on the automatic resource tracking of the
37 command encoders. Assumes that an autorelease pool (ideally per frame) is
38 available on the thread on which QRhi is used.
39*/
40
41#if __has_feature(objc_arc)
42#error ARC not supported
43#endif
44
45// Even though the macOS 13 MTLBinaryArchive problem (QTBUG-106703) seems
46// to be solved in later 13.x releases, we have reports from old Intel hardware
47// and older macOS versions where this causes problems (QTBUG-114338).
48// Thus we no longer do OS version based differentiation, but rather have a
49// single toggle that is currently on, and so QRhi::(set)pipelineCache()
50// does nothing with Metal.
51#define QRHI_METAL_DISABLE_BINARY_ARCHIVE
52
53// We should be able to operate with command buffers that do not automatically
54// retain/release the resources used by them. (since we have logic that mirrors
55// other backends such as the Vulkan one anyway)
56#define QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES
57
58/*!
59 \class QRhiMetalInitParams
60 \inmodule QtGuiPrivate
61 \inheaderfile rhi/qrhi.h
62 \since 6.6
63 \brief Metal specific initialization parameters.
64
65 \note This is a RHI API with limited compatibility guarantees, see \l QRhi
66 for details.
67
68 A Metal-based QRhi needs no special parameters for initialization.
69
70 \badcode
71 QRhiMetalInitParams params;
72 rhi = QRhi::create(QRhi::Metal, &params);
73 \endcode
74
75 \note Metal API validation cannot be enabled programmatically by the QRhi.
76 Instead, either run the debug build of the application in XCode, by
77 generating a \c{.xcodeproj} file via \c{cmake -G Xcode}, or set the
78 environment variable \c{METAL_DEVICE_WRAPPER_TYPE=1}. The variable needs to
79 be set early on in the environment, perferably before starting the process;
80 attempting to set it at QRhi creation time is not functional in practice.
81 (too late probably)
82
83 \note QRhiSwapChain can only target QWindow instances that have their
84 surface type set to QSurface::MetalSurface.
85
86 \section2 Working with existing Metal devices
87
88 When interoperating with another graphics engine, it may be necessary to
89 get a QRhi instance that uses the same Metal device. This can be achieved
90 by passing a pointer to a QRhiMetalNativeHandles to QRhi::create(). The
91 device must be set to a non-null value then. Optionally, a command queue
92 object can be specified as well.
93
94 The QRhi does not take ownership of any of the external objects.
95 */
96
97/*!
98 \class QRhiMetalNativeHandles
99 \inmodule QtGuiPrivate
100 \inheaderfile rhi/qrhi.h
101 \since 6.6
102 \brief Holds the Metal device used by the QRhi.
103
104 \note This is a RHI API with limited compatibility guarantees, see \l QRhi
105 for details.
106 */
107
108/*!
109 \variable QRhiMetalNativeHandles::dev
110
111 Set to a valid MTLDevice to import an existing device.
112*/
113
114/*!
115 \variable QRhiMetalNativeHandles::cmdQueue
116
117 Set to a valid MTLCommandQueue when importing an existing command queue.
118 When \nullptr, QRhi will create a new command queue.
119*/
120
121/*!
122 \class QRhiMetalCommandBufferNativeHandles
123 \inmodule QtGuiPrivate
124 \inheaderfile rhi/qrhi.h
125 \since 6.6
126 \brief Holds the MTLCommandBuffer and MTLRenderCommandEncoder objects that are backing a QRhiCommandBuffer.
127
128 \note The command buffer object is only guaranteed to be valid while
129 recording a frame, that is, between a \l{QRhi::beginFrame()}{beginFrame()}
130 - \l{QRhi::endFrame()}{endFrame()} or
131 \l{QRhi::beginOffscreenFrame()}{beginOffscreenFrame()} -
132 \l{QRhi::endOffscreenFrame()}{endOffsrceenFrame()} pair.
133
134 \note The command encoder is only valid while recording a pass, that is,
135 between \l{QRhiCommandBuffer::beginPass()} -
136 \l{QRhiCommandBuffer::endPass()}.
137
138 \note This is a RHI API with limited compatibility guarantees, see \l QRhi
139 for details.
140 */
141
142/*!
143 \variable QRhiMetalCommandBufferNativeHandles::commandBuffer
144*/
145
146/*!
147 \variable QRhiMetalCommandBufferNativeHandles::encoder
148*/
149
151{
154 std::array<uint, 3> localSize = {};
159
160 void destroy() {
161 nativeResourceBindingMap.clear();
162 [lib release];
163 lib = nil;
164 [func release];
165 func = nil;
166 }
167};
168
170{
171 QRhiMetalData(QRhiMetal *rhi) : q(rhi), ofr(rhi) { }
172
176 API_AVAILABLE(macosx(11.0), ios(14.0)) id<MTLBinaryArchive> binArch = nil;
177
180 const QColor &colorClearValue,
181 const QRhiDepthStencilClearValue &depthStencilClearValue,
182 int colorAttCount,
183 QRhiShadingRateMap *shadingRateMap);
184 id<MTLLibrary> createMetalLib(const QShader &shader, QShader::Variant shaderVariant,
185 QString *error, QByteArray *entryPoint, QShaderKey *activeKey);
186 id<MTLFunction> createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint);
187 bool setupBinaryArchive(NSURL *sourceFileUrl = nil);
188 void addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc);
189 void trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc);
190 void addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc);
191 void trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc);
192
205 int lastActiveFrameSlot; // -1 if not used otherwise 0..FRAMES_IN_FLIGHT-1
206 union {
207 struct {
209 } buffer;
210 struct {
212 } renderbuffer;
213 struct {
214 id<MTLTexture> texture;
216 id<MTLTexture> views[QRhi::MAX_MIP_LEVELS];
217 } texture;
218 struct {
220 } sampler;
221 struct {
223 } stagingBuffer;
224 struct {
229 } graphicsPipeline;
230 struct {
232 } computePipeline;
233 struct {
235 } shadingRateMap;
236 };
237 };
239
241 OffscreenFrame(QRhiImplementation *rhi) : cbWrapper(rhi) { }
242 bool active = false;
243 double lastGpuTime = 0;
245 } ofr;
246
257
266
268
271
272 static const int TEXBUF_ALIGN = 256; // probably not accurate
273
275};
276
279
291
297
311
316
321
347
362
395
397{
420 bool enabled = false;
421 bool failed = false;
429 quint32 vsCompOutputBufferSize(quint32 vertexOrIndexCount, quint32 instanceCount) const
430 {
431 // max vertex output components = resourceLimit(MaxVertexOutputs) * 4 = 60
432 return vertexOrIndexCount * instanceCount * sizeof(float) * 60;
433 }
434 quint32 tescCompOutputBufferSize(quint32 patchCount) const
435 {
436 return outControlPointCount * patchCount * sizeof(float) * 60;
437 }
438 quint32 tescCompPatchOutputBufferSize(quint32 patchCount) const
439 {
440 // assume maxTessellationControlPerPatchOutputComponents is 128
441 return patchCount * sizeof(float) * 128;
442 }
443 quint32 patchCountForDrawCall(quint32 vertexOrIndexCount, quint32 instanceCount) const
444 {
445 return ((vertexOrIndexCount + inControlPointCount - 1) / inControlPointCount) * instanceCount;
446 }
451 } tess;
452 void setupVertexInputDescriptor(MTLVertexDescriptor *desc);
453 void setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc);
454
455 // SPIRV-Cross buffer size buffers
457};
458
460{
464
465 // SPIRV-Cross buffer size buffers
467};
468
480
481QRhiMetal::QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *importDevice)
482{
483 Q_UNUSED(params);
484
485 d = new QRhiMetalData(this);
486
487 importedDevice = importDevice != nullptr;
488 if (importedDevice) {
489 if (importDevice->dev) {
490 d->dev = (id<MTLDevice>) importDevice->dev;
491 importedCmdQueue = importDevice->cmdQueue != nullptr;
492 if (importedCmdQueue)
493 d->cmdQueue = (id<MTLCommandQueue>) importDevice->cmdQueue;
494 } else {
495 qWarning("No MTLDevice given, cannot import");
496 importedDevice = false;
497 }
498 }
499}
500
502{
503 delete d;
504}
505
506template <class Int>
507inline Int aligned(Int v, Int byteAlign)
508{
509 return (v + byteAlign - 1) & ~(byteAlign - 1);
510}
511
512bool QRhiMetal::probe(QRhiMetalInitParams *params)
513{
514 Q_UNUSED(params);
515 id<MTLDevice> dev = MTLCreateSystemDefaultDevice();
516 if (dev) {
517 [dev release];
518 return true;
519 }
520 return false;
521}
522
524{
526 // Do not let the command buffer mess with the refcount of objects. We do
527 // have a proper render loop and will manage lifetimes similarly to other
528 // backends (Vulkan).
529 return [cmdQueue commandBufferWithUnretainedReferences];
530#else
531 return [cmdQueue commandBuffer];
532#endif
533}
534
535bool QRhiMetalData::setupBinaryArchive(NSURL *sourceFileUrl)
536{
538 return false;
539#endif
540
541 [binArch release];
542 MTLBinaryArchiveDescriptor *binArchDesc = [MTLBinaryArchiveDescriptor new];
543 binArchDesc.url = sourceFileUrl;
544 NSError *err = nil;
545 binArch = [dev newBinaryArchiveWithDescriptor: binArchDesc error: &err];
546 [binArchDesc release];
547 if (!binArch) {
548 const QString msg = QString::fromNSString(err.localizedDescription);
549 qWarning("newBinaryArchiveWithDescriptor failed: %s", qPrintable(msg));
550 return false;
551 }
552 return true;
553}
554
555bool QRhiMetal::create(QRhi::Flags flags)
556{
557 rhiFlags = flags;
558
559 if (importedDevice)
560 [d->dev retain];
561 else
562 d->dev = MTLCreateSystemDefaultDevice();
563
564 if (!d->dev) {
565 qWarning("No MTLDevice");
566 return false;
567 }
568
569 const QString deviceName = QString::fromNSString([d->dev name]);
570 qCDebug(QRHI_LOG_INFO, "Metal device: %s", qPrintable(deviceName));
571 driverInfoStruct.deviceName = deviceName.toUtf8();
572
573 // deviceId and vendorId stay unset for now. Note that registryID is not
574 // suitable as deviceId because it does not seem stable on macOS and can
575 // apparently change when the system is rebooted.
576
577#ifdef Q_OS_MACOS
578 const MTLDeviceLocation deviceLocation = [d->dev location];
579 switch (deviceLocation) {
580 case MTLDeviceLocationBuiltIn:
581 driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice;
582 break;
583 case MTLDeviceLocationSlot:
584 driverInfoStruct.deviceType = QRhiDriverInfo::DiscreteDevice;
585 break;
586 case MTLDeviceLocationExternal:
587 driverInfoStruct.deviceType = QRhiDriverInfo::ExternalDevice;
588 break;
589 default:
590 break;
591 }
592#else
593 driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice;
594#endif
595
596 const QOperatingSystemVersion ver = QOperatingSystemVersion::current();
597 osMajor = ver.majorVersion();
598 osMinor = ver.minorVersion();
599
600 if (importedCmdQueue)
601 [d->cmdQueue retain];
602 else
603 d->cmdQueue = [d->dev newCommandQueue];
604
605 d->captureMgr = [MTLCaptureManager sharedCaptureManager];
606 // Have a custom capture scope as well which then shows up in XCode as
607 // an option when capturing, and becomes especially useful when having
608 // multiple windows with multiple QRhis.
609 d->captureScope = [d->captureMgr newCaptureScopeWithCommandQueue: d->cmdQueue];
610 const QString label = QString::asprintf("Qt capture scope for QRhi %p", this);
611 d->captureScope.label = label.toNSString();
612
613#if defined(Q_OS_MACOS) || defined(Q_OS_VISIONOS)
614 caps.maxTextureSize = 16384;
615 caps.baseVertexAndInstance = true;
616 caps.isAppleGPU = [d->dev supportsFamily:MTLGPUFamilyApple7];
617 caps.maxThreadGroupSize = 1024;
618 caps.multiView = true;
619#elif defined(Q_OS_TVOS)
620 if ([d->dev supportsFamily:MTLGPUFamilyApple3])
621 caps.maxTextureSize = 16384;
622 else
623 caps.maxTextureSize = 8192;
624 caps.baseVertexAndInstance = false;
625 caps.isAppleGPU = true;
626#elif defined(Q_OS_IOS)
627 if ([d->dev supportsFamily:MTLGPUFamilyApple3]) {
628 caps.maxTextureSize = 16384;
629 caps.baseVertexAndInstance = true;
630 } else if ([d->dev supportsFamily:MTLGPUFamilyApple2]) {
631 caps.maxTextureSize = 8192;
632 caps.baseVertexAndInstance = false;
633 } else {
634 caps.maxTextureSize = 4096;
635 caps.baseVertexAndInstance = false;
636 }
637 caps.isAppleGPU = true;
638 if ([d->dev supportsFamily:MTLGPUFamilyApple4])
639 caps.maxThreadGroupSize = 1024;
640 if ([d->dev supportsFamily:MTLGPUFamilyApple5])
641 caps.multiView = true;
642#endif
643
644 caps.supportedSampleCounts = { 1 };
645 for (int sampleCount : { 2, 4, 8 }) {
646 if ([d->dev supportsTextureSampleCount: sampleCount])
647 caps.supportedSampleCounts.append(sampleCount);
648 }
649
650 caps.shadingRateMap = [d->dev supportsRasterizationRateMapWithLayerCount: 1];
651 if (caps.shadingRateMap && caps.multiView)
652 caps.shadingRateMap = [d->dev supportsRasterizationRateMapWithLayerCount: 2];
653
654 if (rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
655 d->setupBinaryArchive();
656
657 nativeHandlesStruct.dev = (MTLDevice *) d->dev;
658 nativeHandlesStruct.cmdQueue = (MTLCommandQueue *) d->cmdQueue;
659
660 return true;
661}
662
663void QRhiMetal::destroy()
664{
665 executeDeferredReleases(true);
666 finishActiveReadbacks(true);
667
668 for (QMetalShader &s : d->shaderCache)
669 s.destroy();
670 d->shaderCache.clear();
671
672 [d->captureScope release];
673 d->captureScope = nil;
674
675 [d->binArch release];
676 d->binArch = nil;
677
678 [d->cmdQueue release];
679 if (!importedCmdQueue)
680 d->cmdQueue = nil;
681
682 [d->dev release];
683 if (!importedDevice)
684 d->dev = nil;
685}
686
687QVector<int> QRhiMetal::supportedSampleCounts() const
688{
689 return caps.supportedSampleCounts;
690}
691
692QVector<QSize> QRhiMetal::supportedShadingRates(int sampleCount) const
693{
694 Q_UNUSED(sampleCount);
695 return { QSize(1, 1) };
696}
697
698QRhiSwapChain *QRhiMetal::createSwapChain()
699{
700 return new QMetalSwapChain(this);
701}
702
703QRhiBuffer *QRhiMetal::createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, quint32 size)
704{
705 return new QMetalBuffer(this, type, usage, size);
706}
707
708int QRhiMetal::ubufAlignment() const
709{
710 return 256;
711}
712
713bool QRhiMetal::isYUpInFramebuffer() const
714{
715 return false;
716}
717
718bool QRhiMetal::isYUpInNDC() const
719{
720 return true;
721}
722
723bool QRhiMetal::isClipDepthZeroToOne() const
724{
725 return true;
726}
727
728QMatrix4x4 QRhiMetal::clipSpaceCorrMatrix() const
729{
730 // depth range 0..1
731 static QMatrix4x4 m;
732 if (m.isIdentity()) {
733 // NB the ctor takes row-major
734 m = QMatrix4x4(1.0f, 0.0f, 0.0f, 0.0f,
735 0.0f, 1.0f, 0.0f, 0.0f,
736 0.0f, 0.0f, 0.5f, 0.5f,
737 0.0f, 0.0f, 0.0f, 1.0f);
738 }
739 return m;
740}
741
742bool QRhiMetal::isTextureFormatSupported(QRhiTexture::Format format, QRhiTexture::Flags flags) const
743{
744 Q_UNUSED(flags);
745
746 bool supportsFamilyMac2 = false; // needed for BC* formats
747 bool supportsFamilyApple3 = false;
748
749#ifdef Q_OS_MACOS
750 supportsFamilyMac2 = true;
751 if (caps.isAppleGPU)
752 supportsFamilyApple3 = true;
753#else
754 supportsFamilyApple3 = true;
755#endif
756
757 // BC5 is not available for any Apple hardare
758 if (format == QRhiTexture::BC5)
759 return false;
760
761 if (!supportsFamilyApple3) {
762 if (format >= QRhiTexture::ETC2_RGB8 && format <= QRhiTexture::ETC2_RGBA8)
763 return false;
764 if (format >= QRhiTexture::ASTC_4x4 && format <= QRhiTexture::ASTC_12x12)
765 return false;
766 }
767
768 if (!supportsFamilyMac2)
769 if (format >= QRhiTexture::BC1 && format <= QRhiTexture::BC7)
770 return false;
771
772 return true;
773}
774
775bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const
776{
777 switch (feature) {
778 case QRhi::MultisampleTexture:
779 return true;
780 case QRhi::MultisampleRenderBuffer:
781 return true;
782 case QRhi::DebugMarkers:
783 return true;
784 case QRhi::Timestamps:
785 return true;
786 case QRhi::Instancing:
787 return true;
788 case QRhi::CustomInstanceStepRate:
789 return true;
790 case QRhi::PrimitiveRestart:
791 return true;
792 case QRhi::NonDynamicUniformBuffers:
793 return true;
794 case QRhi::NonFourAlignedEffectiveIndexBufferOffset:
795 return false;
796 case QRhi::NPOTTextureRepeat:
797 return true;
798 case QRhi::RedOrAlpha8IsRed:
799 return true;
800 case QRhi::ElementIndexUint:
801 return true;
802 case QRhi::Compute:
803 return true;
804 case QRhi::WideLines:
805 return false;
806 case QRhi::VertexShaderPointSize:
807 return true;
808 case QRhi::BaseVertex:
809 return caps.baseVertexAndInstance;
810 case QRhi::BaseInstance:
811 return caps.baseVertexAndInstance;
812 case QRhi::TriangleFanTopology:
813 return false;
814 case QRhi::ReadBackNonUniformBuffer:
815 return true;
816 case QRhi::ReadBackNonBaseMipLevel:
817 return true;
818 case QRhi::TexelFetch:
819 return true;
820 case QRhi::RenderToNonBaseMipLevel:
821 return true;
822 case QRhi::IntAttributes:
823 return true;
824 case QRhi::ScreenSpaceDerivatives:
825 return true;
826 case QRhi::ReadBackAnyTextureFormat:
827 return true;
828 case QRhi::PipelineCacheDataLoadSave:
829 return true;
830 case QRhi::ImageDataStride:
831 return true;
832 case QRhi::RenderBufferImport:
833 return false;
834 case QRhi::ThreeDimensionalTextures:
835 return true;
836 case QRhi::RenderTo3DTextureSlice:
837 return true;
838 case QRhi::TextureArrays:
839 return true;
840 case QRhi::Tessellation:
841 return true;
842 case QRhi::GeometryShader:
843 return false;
844 case QRhi::TextureArrayRange:
845 return false;
846 case QRhi::NonFillPolygonMode:
847 return true;
848 case QRhi::OneDimensionalTextures:
849 return true;
850 case QRhi::OneDimensionalTextureMipmaps:
851 return false;
852 case QRhi::HalfAttributes:
853 return true;
854 case QRhi::RenderToOneDimensionalTexture:
855 return false;
856 case QRhi::ThreeDimensionalTextureMipmaps:
857 return true;
858 case QRhi::MultiView:
859 return caps.multiView;
860 case QRhi::TextureViewFormat:
861 return false;
862 case QRhi::ResolveDepthStencil:
863 return true;
864 case QRhi::VariableRateShading:
865 return false;
866 case QRhi::VariableRateShadingMap:
867 return caps.shadingRateMap;
868 case QRhi::VariableRateShadingMapWithTexture:
869 return false;
870 case QRhi::PerRenderTargetBlending:
871 return true;
872 default:
873 Q_UNREACHABLE();
874 return false;
875 }
876}
877
878int QRhiMetal::resourceLimit(QRhi::ResourceLimit limit) const
879{
880 switch (limit) {
881 case QRhi::TextureSizeMin:
882 return 1;
883 case QRhi::TextureSizeMax:
884 return caps.maxTextureSize;
885 case QRhi::MaxColorAttachments:
886 return 8;
887 case QRhi::FramesInFlight:
888 return QMTL_FRAMES_IN_FLIGHT;
889 case QRhi::MaxAsyncReadbackFrames:
890 return QMTL_FRAMES_IN_FLIGHT;
891 case QRhi::MaxThreadGroupsPerDimension:
892 return 65535;
893 case QRhi::MaxThreadsPerThreadGroup:
894 Q_FALLTHROUGH();
895 case QRhi::MaxThreadGroupX:
896 Q_FALLTHROUGH();
897 case QRhi::MaxThreadGroupY:
898 Q_FALLTHROUGH();
899 case QRhi::MaxThreadGroupZ:
900 return caps.maxThreadGroupSize;
901 case QRhi::TextureArraySizeMax:
902 return 2048;
903 case QRhi::MaxUniformBufferRange:
904 return 65536;
905 case QRhi::MaxVertexInputs:
906 return 31;
907 case QRhi::MaxVertexOutputs:
908 return 15; // use the minimum from MTLGPUFamily1/2/3
909 case QRhi::ShadingRateImageTileSize:
910 return 0;
911 default:
912 Q_UNREACHABLE();
913 return 0;
914 }
915}
916
917const QRhiNativeHandles *QRhiMetal::nativeHandles()
918{
919 return &nativeHandlesStruct;
920}
921
922QRhiDriverInfo QRhiMetal::driverInfo() const
923{
924 return driverInfoStruct;
925}
926
927QRhiStats QRhiMetal::statistics()
928{
929 QRhiStats result;
930 result.totalPipelineCreationTime = totalPipelineCreationTime();
931 return result;
932}
933
934bool QRhiMetal::makeThreadLocalNativeContextCurrent()
935{
936 // not applicable
937 return false;
938}
939
940void QRhiMetal::setQueueSubmitParams(QRhiNativeHandles *)
941{
942 // not applicable
943}
944
945void QRhiMetal::releaseCachedResources()
946{
947 for (QMetalShader &s : d->shaderCache)
948 s.destroy();
949
950 d->shaderCache.clear();
951}
952
953bool QRhiMetal::isDeviceLost() const
954{
955 return false;
956}
957
958struct QMetalPipelineCacheDataHeader
959{
960 quint32 rhiId;
961 quint32 arch;
962 quint32 dataSize;
963 quint32 osMajor;
964 quint32 osMinor;
965 char driver[236];
966};
967
968QByteArray QRhiMetal::pipelineCacheData()
969{
970 Q_STATIC_ASSERT(sizeof(QMetalPipelineCacheDataHeader) == 256);
971 QByteArray data;
972 if (!d->binArch || !rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
973 return data;
974
975 QTemporaryFile tmp;
976 if (!tmp.open()) {
977 qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal");
978 return data;
979 }
980 tmp.close(); // the file exists until the tmp dtor runs
981
982 const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath();
983 NSURL *url = QUrl::fromLocalFile(fn).toNSURL();
984 NSError *err = nil;
985 if (![d->binArch serializeToURL: url error: &err]) {
986 const QString msg = QString::fromNSString(err.localizedDescription);
987 // Some of these "errors" are not actual errors. (think of "Nothing to serialize")
988 qCDebug(QRHI_LOG_INFO, "Failed to serialize MTLBinaryArchive: %s", qPrintable(msg));
989 return data;
990 }
991
992 QFile f(fn);
993 if (!f.open(QIODevice::ReadOnly)) {
994 qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to reopen temporary file");
995 return data;
996 }
997 const QByteArray blob = f.readAll();
998 f.close();
999
1000 const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader);
1001 const quint32 dataSize = quint32(blob.size());
1002
1003 data.resize(headerSize + dataSize);
1004
1005 QMetalPipelineCacheDataHeader header = {};
1006 header.rhiId = pipelineCacheRhiId();
1007 header.arch = quint32(sizeof(void*));
1008 header.dataSize = quint32(dataSize);
1009 header.osMajor = osMajor;
1010 header.osMinor = osMinor;
1011 const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length()));
1012 if (driverStrLen)
1013 memcpy(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen);
1014 header.driver[driverStrLen] = '\0';
1015
1016 memcpy(data.data(), &header, headerSize);
1017 memcpy(data.data() + headerSize, blob.constData(), dataSize);
1018 return data;
1019}
1020
1021void QRhiMetal::setPipelineCacheData(const QByteArray &data)
1022{
1023 if (data.isEmpty())
1024 return;
1025
1026 const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader);
1027 if (data.size() < qsizetype(headerSize)) {
1028 qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (header incomplete)");
1029 return;
1030 }
1031
1032 const size_t dataOffset = headerSize;
1033 QMetalPipelineCacheDataHeader header;
1034 memcpy(&header, data.constData(), headerSize);
1035
1036 const quint32 rhiId = pipelineCacheRhiId();
1037 if (header.rhiId != rhiId) {
1038 qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: The data is for a different QRhi version or backend (%u, %u)",
1039 rhiId, header.rhiId);
1040 return;
1041 }
1042
1043 const quint32 arch = quint32(sizeof(void*));
1044 if (header.arch != arch) {
1045 qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Architecture does not match (%u, %u)",
1046 arch, header.arch);
1047 return;
1048 }
1049
1050 if (header.osMajor != osMajor || header.osMinor != osMinor) {
1051 qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: OS version does not match (%u.%u, %u.%u)",
1052 osMajor, osMinor, header.osMajor, header.osMinor);
1053 return;
1054 }
1055
1056 const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length()));
1057 if (strncmp(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen)) {
1058 qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Metal device name does not match");
1059 return;
1060 }
1061
1062 if (data.size() < qsizetype(dataOffset + header.dataSize)) {
1063 qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (data incomplete)");
1064 return;
1065 }
1066
1067 const char *p = data.constData() + dataOffset;
1068
1069 QTemporaryFile tmp;
1070 if (!tmp.open()) {
1071 qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal");
1072 return;
1073 }
1074 tmp.write(p, header.dataSize);
1075 tmp.close(); // the file exists until the tmp dtor runs
1076
1077 const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath();
1078 NSURL *url = QUrl::fromLocalFile(fn).toNSURL();
1079 if (d->setupBinaryArchive(url))
1080 qCDebug(QRHI_LOG_INFO, "Created MTLBinaryArchive with initial data of %u bytes", header.dataSize);
1081}
1082
1083QRhiRenderBuffer *QRhiMetal::createRenderBuffer(QRhiRenderBuffer::Type type, const QSize &pixelSize,
1084 int sampleCount, QRhiRenderBuffer::Flags flags,
1085 QRhiTexture::Format backingFormatHint)
1086{
1087 return new QMetalRenderBuffer(this, type, pixelSize, sampleCount, flags, backingFormatHint);
1088}
1089
1090QRhiTexture *QRhiMetal::createTexture(QRhiTexture::Format format,
1091 const QSize &pixelSize, int depth, int arraySize,
1092 int sampleCount, QRhiTexture::Flags flags)
1093{
1094 return new QMetalTexture(this, format, pixelSize, depth, arraySize, sampleCount, flags);
1095}
1096
1097QRhiSampler *QRhiMetal::createSampler(QRhiSampler::Filter magFilter, QRhiSampler::Filter minFilter,
1098 QRhiSampler::Filter mipmapMode,
1099 QRhiSampler::AddressMode u, QRhiSampler::AddressMode v, QRhiSampler::AddressMode w)
1100{
1101 return new QMetalSampler(this, magFilter, minFilter, mipmapMode, u, v, w);
1102}
1103
1104QRhiShadingRateMap *QRhiMetal::createShadingRateMap()
1105{
1106 return new QMetalShadingRateMap(this);
1107}
1108
1109QRhiTextureRenderTarget *QRhiMetal::createTextureRenderTarget(const QRhiTextureRenderTargetDescription &desc,
1110 QRhiTextureRenderTarget::Flags flags)
1111{
1112 return new QMetalTextureRenderTarget(this, desc, flags);
1113}
1114
1115QRhiGraphicsPipeline *QRhiMetal::createGraphicsPipeline()
1116{
1117 return new QMetalGraphicsPipeline(this);
1118}
1119
1120QRhiComputePipeline *QRhiMetal::createComputePipeline()
1121{
1122 return new QMetalComputePipeline(this);
1123}
1124
1125QRhiShaderResourceBindings *QRhiMetal::createShaderResourceBindings()
1126{
1127 return new QMetalShaderResourceBindings(this);
1128}
1129
1130enum class BindingType {
1131 Buffer,
1132 Texture,
1133 Sampler
1134};
1135
1136static inline int mapBinding(int binding,
1137 int stageIndex,
1138 const QShader::NativeResourceBindingMap *nativeResourceBindingMaps[],
1139 BindingType type)
1140{
1141 const QShader::NativeResourceBindingMap *map = nativeResourceBindingMaps[stageIndex];
1142 if (!map || map->isEmpty())
1143 return binding; // old QShader versions do not have this map, assume 1:1 mapping then
1144
1145 auto it = map->constFind(binding);
1146 if (it != map->cend())
1147 return type == BindingType::Sampler ? it->second : it->first; // may be -1, if the resource is inactive
1148
1149 // Hitting this path is normal too. It is not given that the resource (for
1150 // example, a uniform block) is present in the shaders for all the stages
1151 // specified by the visibility mask in the QRhiShaderResourceBinding.
1152 return -1;
1153}
1154
1155static inline void bindStageBuffers(QMetalCommandBuffer *cbD,
1156 int stage,
1157 const QRhiBatchedBindings<id<MTLBuffer>>::Batch &bufferBatch,
1158 const QRhiBatchedBindings<NSUInteger>::Batch &offsetBatch)
1159{
1160 switch (stage) {
1161 case QMetalShaderResourceBindingsData::VERTEX:
1162 [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData()
1163 offsets: offsetBatch.resources.constData()
1164 withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
1165 break;
1166 case QMetalShaderResourceBindingsData::FRAGMENT:
1167 [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData()
1168 offsets: offsetBatch.resources.constData()
1169 withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
1170 break;
1171 case QMetalShaderResourceBindingsData::COMPUTE:
1172 [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData()
1173 offsets: offsetBatch.resources.constData()
1174 withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
1175 break;
1176 case QMetalShaderResourceBindingsData::TESSCTRL:
1177 case QMetalShaderResourceBindingsData::TESSEVAL:
1178 // do nothing. These are used later for tessellation
1179 break;
1180 default:
1181 Q_UNREACHABLE();
1182 break;
1183 }
1184}
1185
1186static inline void bindStageTextures(QMetalCommandBuffer *cbD,
1187 int stage,
1188 const QRhiBatchedBindings<id<MTLTexture>>::Batch &textureBatch)
1189{
1190 switch (stage) {
1191 case QMetalShaderResourceBindingsData::VERTEX:
1192 [cbD->d->currentRenderPassEncoder setVertexTextures: textureBatch.resources.constData()
1193 withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))];
1194 break;
1195 case QMetalShaderResourceBindingsData::FRAGMENT:
1196 [cbD->d->currentRenderPassEncoder setFragmentTextures: textureBatch.resources.constData()
1197 withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))];
1198 break;
1199 case QMetalShaderResourceBindingsData::COMPUTE:
1200 [cbD->d->currentComputePassEncoder setTextures: textureBatch.resources.constData()
1201 withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))];
1202 break;
1203 case QMetalShaderResourceBindingsData::TESSCTRL:
1204 case QMetalShaderResourceBindingsData::TESSEVAL:
1205 // do nothing. These are used later for tessellation
1206 break;
1207 default:
1208 Q_UNREACHABLE();
1209 break;
1210 }
1211}
1212
1213static inline void bindStageSamplers(QMetalCommandBuffer *cbD,
1214 int encoderStage,
1215 const QRhiBatchedBindings<id<MTLSamplerState>>::Batch &samplerBatch)
1216{
1217 switch (encoderStage) {
1218 case QMetalShaderResourceBindingsData::VERTEX:
1219 [cbD->d->currentRenderPassEncoder setVertexSamplerStates: samplerBatch.resources.constData()
1220 withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))];
1221 break;
1222 case QMetalShaderResourceBindingsData::FRAGMENT:
1223 [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: samplerBatch.resources.constData()
1224 withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))];
1225 break;
1226 case QMetalShaderResourceBindingsData::COMPUTE:
1227 [cbD->d->currentComputePassEncoder setSamplerStates: samplerBatch.resources.constData()
1228 withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))];
1229 break;
1230 case QMetalShaderResourceBindingsData::TESSCTRL:
1231 case QMetalShaderResourceBindingsData::TESSEVAL:
1232 // do nothing. These are used later for tessellation
1233 break;
1234 default:
1235 Q_UNREACHABLE();
1236 break;
1237 }
1238}
1239
1240// Helper that is not used during the common vertex+fragment and compute
1241// pipelines, but is necessary when tessellation is involved and so the
1242// graphics pipeline is under the hood a combination of multiple compute and
1243// render pipelines. We need to be able to set the buffers, textures, samplers
1244// when a switching between render and compute encoders.
1245static inline void rebindShaderResources(QMetalCommandBuffer *cbD, int resourceStage, int encoderStage,
1246 const QMetalShaderResourceBindingsData *customBindingState = nullptr)
1247{
1248 const QMetalShaderResourceBindingsData *bindingData = customBindingState ? customBindingState : &cbD->d->currentShaderResourceBindingState;
1249
1250 for (int i = 0, ie = bindingData->res[resourceStage].bufferBatches.batches.count(); i != ie; ++i) {
1251 const auto &bufferBatch(bindingData->res[resourceStage].bufferBatches.batches[i]);
1252 const auto &offsetBatch(bindingData->res[resourceStage].bufferOffsetBatches.batches[i]);
1253 bindStageBuffers(cbD, encoderStage, bufferBatch, offsetBatch);
1254 }
1255
1256 for (int i = 0, ie = bindingData->res[resourceStage].textureBatches.batches.count(); i != ie; ++i) {
1257 const auto &batch(bindingData->res[resourceStage].textureBatches.batches[i]);
1258 bindStageTextures(cbD, encoderStage, batch);
1259 }
1260
1261 for (int i = 0, ie = bindingData->res[resourceStage].samplerBatches.batches.count(); i != ie; ++i) {
1262 const auto &batch(bindingData->res[resourceStage].samplerBatches.batches[i]);
1263 bindStageSamplers(cbD, encoderStage, batch);
1264 }
1265}
1266
1267static inline QRhiShaderResourceBinding::StageFlag toRhiSrbStage(int stage)
1268{
1269 switch (stage) {
1270 case QMetalShaderResourceBindingsData::VERTEX:
1271 return QRhiShaderResourceBinding::StageFlag::VertexStage;
1272 case QMetalShaderResourceBindingsData::TESSCTRL:
1273 return QRhiShaderResourceBinding::StageFlag::TessellationControlStage;
1274 case QMetalShaderResourceBindingsData::TESSEVAL:
1275 return QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage;
1276 case QMetalShaderResourceBindingsData::FRAGMENT:
1277 return QRhiShaderResourceBinding::StageFlag::FragmentStage;
1278 case QMetalShaderResourceBindingsData::COMPUTE:
1279 return QRhiShaderResourceBinding::StageFlag::ComputeStage;
1280 }
1281
1282 Q_UNREACHABLE_RETURN(QRhiShaderResourceBinding::StageFlag::VertexStage);
1283}
1284
1285void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD,
1286 QMetalCommandBuffer *cbD,
1287 int dynamicOffsetCount,
1288 const QRhiCommandBuffer::DynamicOffset *dynamicOffsets,
1289 bool offsetOnlyChange,
1290 const QShader::NativeResourceBindingMap *nativeResourceBindingMaps[SUPPORTED_STAGES])
1291{
1292 QMetalShaderResourceBindingsData bindingData;
1293
1294 for (const QRhiShaderResourceBinding &binding : std::as_const(srbD->sortedBindings)) {
1295 const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(binding);
1296 switch (b->type) {
1297 case QRhiShaderResourceBinding::UniformBuffer:
1298 {
1299 QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.ubuf.buf);
1300 id<MTLBuffer> mtlbuf = bufD->d->buf[bufD->d->slotted ? currentFrameSlot : 0];
1301 quint32 offset = b->u.ubuf.offset;
1302 for (int i = 0; i < dynamicOffsetCount; ++i) {
1303 const QRhiCommandBuffer::DynamicOffset &dynOfs(dynamicOffsets[i]);
1304 if (dynOfs.first == b->binding) {
1305 offset = dynOfs.second;
1306 break;
1307 }
1308 }
1309
1310 for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
1311 if (b->stage.testFlag(toRhiSrbStage(stage))) {
1312 const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer);
1313 if (nativeBinding >= 0)
1314 bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset });
1315 }
1316 }
1317 }
1318 break;
1319 case QRhiShaderResourceBinding::SampledTexture:
1320 case QRhiShaderResourceBinding::Texture:
1321 case QRhiShaderResourceBinding::Sampler:
1322 {
1323 const QRhiShaderResourceBinding::Data::TextureAndOrSamplerData *data = &b->u.stex;
1324 for (int elem = 0; elem < data->count; ++elem) {
1325 QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.stex.texSamplers[elem].tex);
1326 QMetalSampler *samplerD = QRHI_RES(QMetalSampler, b->u.stex.texSamplers[elem].sampler);
1327
1328 for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
1329 if (b->stage.testFlag(toRhiSrbStage(stage))) {
1330 // Must handle all three cases (combined, separate, separate):
1331 // first = texture binding, second = sampler binding
1332 // first = texture binding
1333 // first = sampler binding (i.e. BindingType::Texture...)
1334 const int textureBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture);
1335 const int samplerBinding = texD && samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Sampler)
1336 : (samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture) : -1);
1337 if (textureBinding >= 0 && texD)
1338 bindingData.res[stage].textures.append({ textureBinding + elem, texD->d->tex });
1339 if (samplerBinding >= 0)
1340 bindingData.res[stage].samplers.append({ samplerBinding + elem, samplerD->d->samplerState });
1341 }
1342 }
1343 }
1344 }
1345 break;
1346 case QRhiShaderResourceBinding::ImageLoad:
1347 case QRhiShaderResourceBinding::ImageStore:
1348 case QRhiShaderResourceBinding::ImageLoadStore:
1349 {
1350 QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.simage.tex);
1351 id<MTLTexture> t = texD->d->viewForLevel(b->u.simage.level);
1352
1353 for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
1354 if (b->stage.testFlag(toRhiSrbStage(stage))) {
1355 const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture);
1356 if (nativeBinding >= 0)
1357 bindingData.res[stage].textures.append({ nativeBinding, t });
1358 }
1359 }
1360 }
1361 break;
1362 case QRhiShaderResourceBinding::BufferLoad:
1363 case QRhiShaderResourceBinding::BufferStore:
1364 case QRhiShaderResourceBinding::BufferLoadStore:
1365 {
1366 QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf);
1367 id<MTLBuffer> mtlbuf = bufD->d->buf[0];
1368 quint32 offset = b->u.sbuf.offset;
1369 for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
1370 if (b->stage.testFlag(toRhiSrbStage(stage))) {
1371 const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer);
1372 if (nativeBinding >= 0)
1373 bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset });
1374 }
1375 }
1376 }
1377 break;
1378 default:
1379 Q_UNREACHABLE();
1380 break;
1381 }
1382 }
1383
1384 for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
1385 if (cbD->recordingPass != QMetalCommandBuffer::RenderPass && (stage == QMetalShaderResourceBindingsData::VERTEX || stage == QMetalShaderResourceBindingsData::FRAGMENT
1386 || stage == QMetalShaderResourceBindingsData::TESSCTRL || stage == QMetalShaderResourceBindingsData::TESSEVAL))
1387 continue;
1388 if (cbD->recordingPass != QMetalCommandBuffer::ComputePass && (stage == QMetalShaderResourceBindingsData::COMPUTE))
1389 continue;
1390
1391 // QRhiBatchedBindings works with the native bindings and expects
1392 // sorted input. The pre-sorted QRhiShaderResourceBinding list (based
1393 // on the QRhi (SPIR-V) binding) is not helpful in this regard, so we
1394 // have to sort here every time.
1395
1396 std::sort(bindingData.res[stage].buffers.begin(), bindingData.res[stage].buffers.end(), [](const QMetalShaderResourceBindingsData::Stage::Buffer &a, const QMetalShaderResourceBindingsData::Stage::Buffer &b) {
1397 return a.nativeBinding < b.nativeBinding;
1398 });
1399
1400 for (const QMetalShaderResourceBindingsData::Stage::Buffer &buf : std::as_const(bindingData.res[stage].buffers)) {
1401 bindingData.res[stage].bufferBatches.feed(buf.nativeBinding, buf.mtlbuf);
1402 bindingData.res[stage].bufferOffsetBatches.feed(buf.nativeBinding, buf.offset);
1403 }
1404
1405 bindingData.res[stage].bufferBatches.finish();
1406 bindingData.res[stage].bufferOffsetBatches.finish();
1407
1408 for (int i = 0, ie = bindingData.res[stage].bufferBatches.batches.count(); i != ie; ++i) {
1409 const auto &bufferBatch(bindingData.res[stage].bufferBatches.batches[i]);
1410 const auto &offsetBatch(bindingData.res[stage].bufferOffsetBatches.batches[i]);
1411 // skip setting Buffer binding if the current state is already correct
1412 if (cbD->d->currentShaderResourceBindingState.res[stage].bufferBatches.batches.count() > i
1413 && cbD->d->currentShaderResourceBindingState.res[stage].bufferOffsetBatches.batches.count() > i
1414 && bufferBatch == cbD->d->currentShaderResourceBindingState.res[stage].bufferBatches.batches[i]
1415 && offsetBatch == cbD->d->currentShaderResourceBindingState.res[stage].bufferOffsetBatches.batches[i])
1416 {
1417 continue;
1418 }
1419 bindStageBuffers(cbD, stage, bufferBatch, offsetBatch);
1420 }
1421
1422 if (offsetOnlyChange)
1423 continue;
1424
1425 std::sort(bindingData.res[stage].textures.begin(), bindingData.res[stage].textures.end(), [](const QMetalShaderResourceBindingsData::Stage::Texture &a, const QMetalShaderResourceBindingsData::Stage::Texture &b) {
1426 return a.nativeBinding < b.nativeBinding;
1427 });
1428
1429 std::sort(bindingData.res[stage].samplers.begin(), bindingData.res[stage].samplers.end(), [](const QMetalShaderResourceBindingsData::Stage::Sampler &a, const QMetalShaderResourceBindingsData::Stage::Sampler &b) {
1430 return a.nativeBinding < b.nativeBinding;
1431 });
1432
1433 for (const QMetalShaderResourceBindingsData::Stage::Texture &t : std::as_const(bindingData.res[stage].textures))
1434 bindingData.res[stage].textureBatches.feed(t.nativeBinding, t.mtltex);
1435
1436 for (const QMetalShaderResourceBindingsData::Stage::Sampler &s : std::as_const(bindingData.res[stage].samplers))
1437 bindingData.res[stage].samplerBatches.feed(s.nativeBinding, s.mtlsampler);
1438
1439 bindingData.res[stage].textureBatches.finish();
1440 bindingData.res[stage].samplerBatches.finish();
1441
1442 for (int i = 0, ie = bindingData.res[stage].textureBatches.batches.count(); i != ie; ++i) {
1443 const auto &batch(bindingData.res[stage].textureBatches.batches[i]);
1444 // skip setting Texture binding if the current state is already correct
1445 if (cbD->d->currentShaderResourceBindingState.res[stage].textureBatches.batches.count() > i
1446 && batch == cbD->d->currentShaderResourceBindingState.res[stage].textureBatches.batches[i])
1447 {
1448 continue;
1449 }
1450 bindStageTextures(cbD, stage, batch);
1451 }
1452
1453 for (int i = 0, ie = bindingData.res[stage].samplerBatches.batches.count(); i != ie; ++i) {
1454 const auto &batch(bindingData.res[stage].samplerBatches.batches[i]);
1455 // skip setting Sampler State if the current state is already correct
1456 if (cbD->d->currentShaderResourceBindingState.res[stage].samplerBatches.batches.count() > i
1457 && batch == cbD->d->currentShaderResourceBindingState.res[stage].samplerBatches.batches[i])
1458 {
1459 continue;
1460 }
1461 bindStageSamplers(cbD, stage, batch);
1462 }
1463 }
1464
1465 cbD->d->currentShaderResourceBindingState = bindingData;
1466}
1467
1468void QMetalGraphicsPipeline::makeActiveForCurrentRenderPassEncoder(QMetalCommandBuffer *cbD)
1469{
1470 [cbD->d->currentRenderPassEncoder setRenderPipelineState: d->ps];
1471
1472 if (cbD->d->currentDepthStencilState != d->ds) {
1473 [cbD->d->currentRenderPassEncoder setDepthStencilState: d->ds];
1474 cbD->d->currentDepthStencilState = d->ds;
1475 }
1476
1477 if (cbD->currentCullMode == -1 || d->cullMode != uint(cbD->currentCullMode)) {
1478 [cbD->d->currentRenderPassEncoder setCullMode: d->cullMode];
1479 cbD->currentCullMode = int(d->cullMode);
1480 }
1481 if (cbD->currentTriangleFillMode == -1 || d->triangleFillMode != uint(cbD->currentTriangleFillMode)) {
1482 [cbD->d->currentRenderPassEncoder setTriangleFillMode: d->triangleFillMode];
1483 cbD->currentTriangleFillMode = int(d->triangleFillMode);
1484 }
1485 if (cbD->currentFrontFaceWinding == -1 || d->winding != uint(cbD->currentFrontFaceWinding)) {
1486 [cbD->d->currentRenderPassEncoder setFrontFacingWinding: d->winding];
1487 cbD->currentFrontFaceWinding = int(d->winding);
1488 }
1489 if (!qFuzzyCompare(d->depthBias, cbD->currentDepthBiasValues.first)
1490 || !qFuzzyCompare(d->slopeScaledDepthBias, cbD->currentDepthBiasValues.second))
1491 {
1492 [cbD->d->currentRenderPassEncoder setDepthBias: d->depthBias
1493 slopeScale: d->slopeScaledDepthBias
1494 clamp: 0.0f];
1495 cbD->currentDepthBiasValues = { d->depthBias, d->slopeScaledDepthBias };
1496 }
1497}
1498
1499void QRhiMetal::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps)
1500{
1501 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
1502 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
1503 QMetalGraphicsPipeline *psD = QRHI_RES(QMetalGraphicsPipeline, ps);
1504
1505 if (cbD->currentGraphicsPipeline == psD && cbD->currentPipelineGeneration == psD->generation)
1506 return;
1507
1508 cbD->currentGraphicsPipeline = psD;
1509 cbD->currentComputePipeline = nullptr;
1510 cbD->currentPipelineGeneration = psD->generation;
1511
1512 if (!psD->d->tess.enabled && !psD->d->tess.failed)
1513 psD->makeActiveForCurrentRenderPassEncoder(cbD);
1514
1515 // mark work buffers that can now be safely reused as reusable
1516 // NOTE: These are usually empty unless tessellation or mutiview is used.
1517 for (QMetalBuffer *workBuf : psD->d->extraBufMgr.deviceLocalWorkBuffers) {
1518 if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot)
1519 workBuf->lastActiveFrameSlot = -1;
1520 }
1521 for (QMetalBuffer *workBuf : psD->d->extraBufMgr.hostVisibleWorkBuffers) {
1522 if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot)
1523 workBuf->lastActiveFrameSlot = -1;
1524 }
1525
1526 psD->lastActiveFrameSlot = currentFrameSlot;
1527}
1528
1529void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBindings *srb,
1530 int dynamicOffsetCount,
1531 const QRhiCommandBuffer::DynamicOffset *dynamicOffsets)
1532{
1533 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
1534 Q_ASSERT(cbD->recordingPass != QMetalCommandBuffer::NoPass);
1535 QMetalGraphicsPipeline *gfxPsD = cbD->currentGraphicsPipeline;
1536 QMetalComputePipeline *compPsD = cbD->currentComputePipeline;
1537
1538 if (!srb) {
1539 if (gfxPsD)
1540 srb = gfxPsD->m_shaderResourceBindings;
1541 else
1542 srb = compPsD->m_shaderResourceBindings;
1543 }
1544
1545 QMetalShaderResourceBindings *srbD = QRHI_RES(QMetalShaderResourceBindings, srb);
1546 bool hasSlottedResourceInSrb = false;
1547 bool hasDynamicOffsetInSrb = false;
1548 bool resNeedsRebind = false;
1549
1550 // SPIRV-Cross buffer size buffers
1551 // Need to determine storage buffer sizes here as this is the last opportunity for storage
1552 // buffer bindings (offset, size) to be specified before draw / dispatch call
1553 const bool needsBufferSizeBuffer = (compPsD && compPsD->d->bufferSizeBuffer) || (gfxPsD && gfxPsD->d->bufferSizeBuffer);
1554 QMap<QRhiShaderResourceBinding::StageFlag, QMap<int, quint32>> storageBufferSizes;
1555
1556 // do buffer writes, figure out if we need to rebind, and mark as in-use
1557 for (int i = 0, ie = srbD->sortedBindings.count(); i != ie; ++i) {
1558 const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->sortedBindings.at(i));
1559 QMetalShaderResourceBindings::BoundResourceData &bd(srbD->boundResourceData[i]);
1560 switch (b->type) {
1561 case QRhiShaderResourceBinding::UniformBuffer:
1562 {
1563 QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.ubuf.buf);
1564 Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer));
1565 executeBufferHostWritesForCurrentFrame(bufD);
1566 if (bufD->d->slotted)
1567 hasSlottedResourceInSrb = true;
1568 if (b->u.ubuf.hasDynamicOffset)
1569 hasDynamicOffsetInSrb = true;
1570 if (bufD->generation != bd.ubuf.generation || bufD->m_id != bd.ubuf.id) {
1571 resNeedsRebind = true;
1572 bd.ubuf.id = bufD->m_id;
1573 bd.ubuf.generation = bufD->generation;
1574 }
1575 bufD->lastActiveFrameSlot = currentFrameSlot;
1576 }
1577 break;
1578 case QRhiShaderResourceBinding::SampledTexture:
1579 case QRhiShaderResourceBinding::Texture:
1580 case QRhiShaderResourceBinding::Sampler:
1581 {
1582 const QRhiShaderResourceBinding::Data::TextureAndOrSamplerData *data = &b->u.stex;
1583 if (bd.stex.count != data->count) {
1584 bd.stex.count = data->count;
1585 resNeedsRebind = true;
1586 }
1587 for (int elem = 0; elem < data->count; ++elem) {
1588 QMetalTexture *texD = QRHI_RES(QMetalTexture, data->texSamplers[elem].tex);
1589 QMetalSampler *samplerD = QRHI_RES(QMetalSampler, data->texSamplers[elem].sampler);
1590 Q_ASSERT(texD || samplerD);
1591 const quint64 texId = texD ? texD->m_id : 0;
1592 const uint texGen = texD ? texD->generation : 0;
1593 const quint64 samplerId = samplerD ? samplerD->m_id : 0;
1594 const uint samplerGen = samplerD ? samplerD->generation : 0;
1595 if (texGen != bd.stex.d[elem].texGeneration
1596 || texId != bd.stex.d[elem].texId
1597 || samplerGen != bd.stex.d[elem].samplerGeneration
1598 || samplerId != bd.stex.d[elem].samplerId)
1599 {
1600 resNeedsRebind = true;
1601 bd.stex.d[elem].texId = texId;
1602 bd.stex.d[elem].texGeneration = texGen;
1603 bd.stex.d[elem].samplerId = samplerId;
1604 bd.stex.d[elem].samplerGeneration = samplerGen;
1605 }
1606 if (texD)
1607 texD->lastActiveFrameSlot = currentFrameSlot;
1608 if (samplerD)
1609 samplerD->lastActiveFrameSlot = currentFrameSlot;
1610 }
1611 }
1612 break;
1613 case QRhiShaderResourceBinding::ImageLoad:
1614 case QRhiShaderResourceBinding::ImageStore:
1615 case QRhiShaderResourceBinding::ImageLoadStore:
1616 {
1617 QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.simage.tex);
1618 if (texD->generation != bd.simage.generation || texD->m_id != bd.simage.id) {
1619 resNeedsRebind = true;
1620 bd.simage.id = texD->m_id;
1621 bd.simage.generation = texD->generation;
1622 }
1623 texD->lastActiveFrameSlot = currentFrameSlot;
1624 }
1625 break;
1626 case QRhiShaderResourceBinding::BufferLoad:
1627 case QRhiShaderResourceBinding::BufferStore:
1628 case QRhiShaderResourceBinding::BufferLoadStore:
1629 {
1630 QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf);
1631 Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer));
1632
1633 if (needsBufferSizeBuffer) {
1634 for (int i = 0; i < 6; ++i) {
1635 const QRhiShaderResourceBinding::StageFlag stage =
1636 QRhiShaderResourceBinding::StageFlag(1 << i);
1637 if (b->stage.testFlag(stage)) {
1638 storageBufferSizes[stage][b->binding] = b->u.sbuf.maybeSize ? b->u.sbuf.maybeSize : bufD->size();
1639 }
1640 }
1641 }
1642
1643 executeBufferHostWritesForCurrentFrame(bufD);
1644 if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) {
1645 resNeedsRebind = true;
1646 bd.sbuf.id = bufD->m_id;
1647 bd.sbuf.generation = bufD->generation;
1648 }
1649 bufD->lastActiveFrameSlot = currentFrameSlot;
1650 }
1651 break;
1652 default:
1653 Q_UNREACHABLE();
1654 break;
1655 }
1656 }
1657
1658 if (needsBufferSizeBuffer) {
1659 QMetalBuffer *bufD = nullptr;
1660 QVarLengthArray<QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag>, 4> shaders;
1661
1662 if (compPsD) {
1663 bufD = compPsD->d->bufferSizeBuffer;
1664 Q_ASSERT(compPsD->d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
1665 shaders.append(qMakePair(&compPsD->d->cs, QRhiShaderResourceBinding::StageFlag::ComputeStage));
1666 } else {
1667 bufD = gfxPsD->d->bufferSizeBuffer;
1668 if (gfxPsD->d->tess.enabled) {
1669
1670 // Assumptions
1671 // * We only use one of the compute vertex shader variants in a pipeline at any one time
1672 // * The vertex shader variants all have the same storage block bindings
1673 // * The vertex shader variants all have the same native resource binding map
1674 // * The vertex shader variants all have the same MslBufferSizeBufferBinding requirement
1675 // * The vertex shader variants all have the same MslBufferSizeBufferBinding binding
1676 // => We only need to use one vertex shader variant to generate the identical shader
1677 // resource bindings
1678 Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[1].desc.storageBlocks());
1679 Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[2].desc.storageBlocks());
1680 Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap);
1681 Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap);
1682 Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)
1683 == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
1684 Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)
1685 == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
1686 Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]
1687 == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]);
1688 Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]
1689 == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]);
1690
1691 if (gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
1692 shaders.append(qMakePair(&gfxPsD->d->tess.compVs[0], QRhiShaderResourceBinding::StageFlag::VertexStage));
1693
1694 if (gfxPsD->d->tess.compTesc.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
1695 shaders.append(qMakePair(&gfxPsD->d->tess.compTesc, QRhiShaderResourceBinding::StageFlag::TessellationControlStage));
1696
1697 if (gfxPsD->d->tess.vertTese.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
1698 shaders.append(qMakePair(&gfxPsD->d->tess.vertTese, QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage));
1699
1700 } else {
1701 if (gfxPsD->d->vs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
1702 shaders.append(qMakePair(&gfxPsD->d->vs, QRhiShaderResourceBinding::StageFlag::VertexStage));
1703 }
1704 if (gfxPsD->d->fs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
1705 shaders.append(qMakePair(&gfxPsD->d->fs, QRhiShaderResourceBinding::StageFlag::FragmentStage));
1706 }
1707
1708 quint32 offset = 0;
1709 for (const QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag> &shader : shaders) {
1710
1711 const int binding = shader.first->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
1712
1713 // if we don't have a srb entry for the buffer size buffer
1714 if (!(storageBufferSizes.contains(shader.second) && storageBufferSizes[shader.second].contains(binding))) {
1715
1716 int maxNativeBinding = 0;
1717 for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks())
1718 maxNativeBinding = qMax(maxNativeBinding, shader.first->nativeResourceBindingMap[block.binding].first);
1719
1720 const int size = (maxNativeBinding + 1) * sizeof(int);
1721
1722 Q_ASSERT(offset + size <= bufD->size());
1723 srbD->sortedBindings.append(QRhiShaderResourceBinding::bufferLoad(binding, shader.second, bufD, offset, size));
1724
1725 QMetalShaderResourceBindings::BoundResourceData bd;
1726 bd.sbuf.id = bufD->m_id;
1727 bd.sbuf.generation = bufD->generation;
1728 srbD->boundResourceData.append(bd);
1729 }
1730
1731 // create the buffer size buffer data
1732 QVarLengthArray<int, 8> bufferSizeBufferData;
1733 Q_ASSERT(storageBufferSizes.contains(shader.second));
1734 const QMap<int, quint32> &sizes(storageBufferSizes[shader.second]);
1735 for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) {
1736 const int index = shader.first->nativeResourceBindingMap[block.binding].first;
1737
1738 // if the native binding is -1, the buffer is present but not accessed in the shader
1739 if (index < 0)
1740 continue;
1741
1742 if (bufferSizeBufferData.size() <= index)
1743 bufferSizeBufferData.resize(index + 1);
1744
1745 Q_ASSERT(sizes.contains(block.binding));
1746 bufferSizeBufferData[index] = sizes[block.binding];
1747 }
1748
1749 QRhiBufferData data;
1750 const quint32 size = bufferSizeBufferData.size() * sizeof(int);
1751 data.assign(reinterpret_cast<const char *>(bufferSizeBufferData.constData()), size);
1752 Q_ASSERT(offset + size <= bufD->size());
1753 bufD->d->pendingUpdates[bufD->d->slotted ? currentFrameSlot : 0].append({ offset, data });
1754
1755 // buffer offsets must be 32byte aligned
1756 offset += ((size + 31) / 32) * 32;
1757 }
1758
1759 executeBufferHostWritesForCurrentFrame(bufD);
1760 bufD->lastActiveFrameSlot = currentFrameSlot;
1761 }
1762
1763 // make sure the resources for the correct slot get bound
1764 const int resSlot = hasSlottedResourceInSrb ? currentFrameSlot : 0;
1765 if (hasSlottedResourceInSrb && cbD->currentResSlot != resSlot)
1766 resNeedsRebind = true;
1767
1768 const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srbD) : (cbD->currentComputeSrb != srbD);
1769 const bool srbRebuilt = cbD->currentSrbGeneration != srbD->generation;
1770
1771 // dynamic uniform buffer offsets always trigger a rebind
1772 if (hasDynamicOffsetInSrb || resNeedsRebind || srbChanged || srbRebuilt) {
1773 const QShader::NativeResourceBindingMap *resBindMaps[SUPPORTED_STAGES] = { nullptr, nullptr, nullptr, nullptr, nullptr };
1774 if (gfxPsD) {
1775 cbD->currentGraphicsSrb = srbD;
1776 cbD->currentComputeSrb = nullptr;
1777 if (gfxPsD->d->tess.enabled) {
1778 // If tessellating, we don't know which compVs shader to use until the draw call is
1779 // made. They should all have the same native resource binding map, so pick one.
1780 Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap);
1781 Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap);
1782 resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->tess.compVs[0].nativeResourceBindingMap;
1783 resBindMaps[QMetalShaderResourceBindingsData::TESSCTRL] = &gfxPsD->d->tess.compTesc.nativeResourceBindingMap;
1784 resBindMaps[QMetalShaderResourceBindingsData::TESSEVAL] = &gfxPsD->d->tess.vertTese.nativeResourceBindingMap;
1785 } else {
1786 resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->vs.nativeResourceBindingMap;
1787 }
1788 resBindMaps[QMetalShaderResourceBindingsData::FRAGMENT] = &gfxPsD->d->fs.nativeResourceBindingMap;
1789 } else {
1790 cbD->currentGraphicsSrb = nullptr;
1791 cbD->currentComputeSrb = srbD;
1792 resBindMaps[QMetalShaderResourceBindingsData::COMPUTE] = &compPsD->d->cs.nativeResourceBindingMap;
1793 }
1794 cbD->currentSrbGeneration = srbD->generation;
1795 cbD->currentResSlot = resSlot;
1796
1797 const bool offsetOnlyChange = hasDynamicOffsetInSrb && !resNeedsRebind && !srbChanged && !srbRebuilt;
1798 enqueueShaderResourceBindings(srbD, cbD, dynamicOffsetCount, dynamicOffsets, offsetOnlyChange, resBindMaps);
1799 }
1800}
1801
1802void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb,
1803 int startBinding, int bindingCount, const QRhiCommandBuffer::VertexInput *bindings,
1804 QRhiBuffer *indexBuf, quint32 indexOffset, QRhiCommandBuffer::IndexFormat indexFormat)
1805{
1806 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
1807 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
1808
1809 QRhiBatchedBindings<id<MTLBuffer> > buffers;
1810 QRhiBatchedBindings<NSUInteger> offsets;
1811 for (int i = 0; i < bindingCount; ++i) {
1812 QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, bindings[i].first);
1813 executeBufferHostWritesForCurrentFrame(bufD);
1814 bufD->lastActiveFrameSlot = currentFrameSlot;
1815 id<MTLBuffer> mtlbuf = bufD->d->buf[bufD->d->slotted ? currentFrameSlot : 0];
1816 buffers.feed(startBinding + i, mtlbuf);
1817 offsets.feed(startBinding + i, bindings[i].second);
1818 }
1819 buffers.finish();
1820 offsets.finish();
1821
1822 // same binding space for vertex and constant buffers - work it around
1823 QMetalShaderResourceBindings *srbD = cbD->currentGraphicsSrb;
1824 // There's nothing guaranteeing setShaderResources() was called before
1825 // setVertexInput()... but whatever srb will get bound will have to be
1826 // layout-compatible anyways so maxBinding is the same.
1827 if (!srbD)
1828 srbD = QRHI_RES(QMetalShaderResourceBindings, cbD->currentGraphicsPipeline->shaderResourceBindings());
1829 const int firstVertexBinding = srbD->maxBinding + 1;
1830
1831 if (firstVertexBinding != cbD->d->currentFirstVertexBinding
1832 || buffers != cbD->d->currentVertexInputsBuffers
1833 || offsets != cbD->d->currentVertexInputOffsets)
1834 {
1835 cbD->d->currentFirstVertexBinding = firstVertexBinding;
1836 cbD->d->currentVertexInputsBuffers = buffers;
1837 cbD->d->currentVertexInputOffsets = offsets;
1838
1839 for (int i = 0, ie = buffers.batches.count(); i != ie; ++i) {
1840 const auto &bufferBatch(buffers.batches[i]);
1841 const auto &offsetBatch(offsets.batches[i]);
1842 [cbD->d->currentRenderPassEncoder setVertexBuffers:
1843 bufferBatch.resources.constData()
1844 offsets: offsetBatch.resources.constData()
1845 withRange: NSMakeRange(uint(firstVertexBinding) + bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
1846 }
1847 }
1848
1849 if (indexBuf) {
1850 QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, indexBuf);
1851 executeBufferHostWritesForCurrentFrame(ibufD);
1852 ibufD->lastActiveFrameSlot = currentFrameSlot;
1853 cbD->currentIndexBuffer = ibufD;
1854 cbD->currentIndexOffset = indexOffset;
1855 cbD->currentIndexFormat = indexFormat;
1856 } else {
1857 cbD->currentIndexBuffer = nullptr;
1858 }
1859}
1860
1861void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport)
1862{
1863 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
1864 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
1865 QSize outputSize = cbD->currentTarget->pixelSize();
1866
1867 // If we have a shading rate map check and use the output size as given by the "screenSize"
1868 // call. This is important for the viewport to be correct when using a shading rate map, as
1869 // the pixel size of the target will likely be smaller then what will be rendered to the output.
1870 // This is specifically needed for visionOS.
1871 if (cbD->currentTarget->resourceType() == QRhiResource::TextureRenderTarget) {
1872 QRhiTextureRenderTarget *rt = static_cast<QRhiTextureRenderTarget *>(cbD->currentTarget);
1873 if (QRhiShadingRateMap *srm = rt->description().shadingRateMap()) {
1874 if (id<MTLRasterizationRateMap> rateMap = QRHI_RES(QMetalShadingRateMap, srm)->d->rateMap) {
1875 auto screenSize = [rateMap screenSize];
1876 outputSize = QSize(screenSize.width, screenSize.height);
1877 }
1878 }
1879 }
1880
1881 // x,y is top-left in MTLViewportRect but bottom-left in QRhiViewport
1882 float x, y, w, h;
1883 if (!qrhi_toTopLeftRenderTargetRect<UnBounded>(outputSize, viewport.viewport(), &x, &y, &w, &h))
1884 return;
1885
1886 MTLViewport vp;
1887 vp.originX = double(x);
1888 vp.originY = double(y);
1889 vp.width = double(w);
1890 vp.height = double(h);
1891 vp.znear = double(viewport.minDepth());
1892 vp.zfar = double(viewport.maxDepth());
1893
1894 [cbD->d->currentRenderPassEncoder setViewport: vp];
1895
1896 if (cbD->currentGraphicsPipeline
1897 && !cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) {
1898 MTLScissorRect s;
1899 qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, viewport.viewport(), &x, &y, &w, &h);
1900 s.x = NSUInteger(x);
1901 s.y = NSUInteger(y);
1902 s.width = NSUInteger(w);
1903 s.height = NSUInteger(h);
1904 [cbD->d->currentRenderPassEncoder setScissorRect: s];
1905 }
1906}
1907
1908void QRhiMetal::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor)
1909{
1910 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
1911 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
1912 Q_ASSERT(cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor));
1913 const QSize outputSize = cbD->currentTarget->pixelSize();
1914
1915 // x,y is top-left in MTLScissorRect but bottom-left in QRhiScissor
1916 int x, y, w, h;
1917 if (!qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, scissor.scissor(), &x, &y, &w, &h))
1918 return;
1919
1920 MTLScissorRect s;
1921 s.x = NSUInteger(x);
1922 s.y = NSUInteger(y);
1923 s.width = NSUInteger(w);
1924 s.height = NSUInteger(h);
1925
1926 [cbD->d->currentRenderPassEncoder setScissorRect: s];
1927}
1928
1929void QRhiMetal::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c)
1930{
1931 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
1932 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
1933
1934 [cbD->d->currentRenderPassEncoder setBlendColorRed: c.redF()
1935 green: c.greenF() blue: c.blueF() alpha: c.alphaF()];
1936}
1937
1938void QRhiMetal::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue)
1939{
1940 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
1941 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
1942
1943 [cbD->d->currentRenderPassEncoder setStencilReferenceValue: refValue];
1944}
1945
1946void QRhiMetal::setShadingRate(QRhiCommandBuffer *cb, const QSize &coarsePixelSize)
1947{
1948 Q_UNUSED(cb);
1949 Q_UNUSED(coarsePixelSize);
1950}
1951
1952static id<MTLComputeCommandEncoder> tessellationComputeEncoder(QMetalCommandBuffer *cbD)
1953{
1954 if (cbD->d->currentRenderPassEncoder) {
1955 [cbD->d->currentRenderPassEncoder endEncoding];
1956 cbD->d->currentRenderPassEncoder = nil;
1957 }
1958
1959 if (!cbD->d->tessellationComputeEncoder)
1960 cbD->d->tessellationComputeEncoder = [cbD->d->cb computeCommandEncoder];
1961
1962 return cbD->d->tessellationComputeEncoder;
1963}
1964
1965static void endTessellationComputeEncoding(QMetalCommandBuffer *cbD)
1966{
1967 if (cbD->d->tessellationComputeEncoder) {
1968 [cbD->d->tessellationComputeEncoder endEncoding];
1969 cbD->d->tessellationComputeEncoder = nil;
1970 }
1971
1972 QMetalRenderTargetData * rtD = nullptr;
1973
1974 switch (cbD->currentTarget->resourceType()) {
1975 case QRhiResource::SwapChainRenderTarget:
1976 rtD = QRHI_RES(QMetalSwapChainRenderTarget, cbD->currentTarget)->d;
1977 break;
1978 case QRhiResource::TextureRenderTarget:
1979 rtD = QRHI_RES(QMetalTextureRenderTarget, cbD->currentTarget)->d;
1980 break;
1981 default:
1982 break;
1983 }
1984
1985 Q_ASSERT(rtD);
1986
1987 QVarLengthArray<MTLLoadAction, 4> oldColorLoad;
1988 for (uint i = 0; i < uint(rtD->colorAttCount); ++i) {
1989 oldColorLoad.append(cbD->d->currentPassRpDesc.colorAttachments[i].loadAction);
1990 if (cbD->d->currentPassRpDesc.colorAttachments[i].storeAction != MTLStoreActionDontCare)
1991 cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad;
1992 }
1993
1994 MTLLoadAction oldDepthLoad;
1995 MTLLoadAction oldStencilLoad;
1996 if (rtD->dsAttCount) {
1997 oldDepthLoad = cbD->d->currentPassRpDesc.depthAttachment.loadAction;
1998 if (cbD->d->currentPassRpDesc.depthAttachment.storeAction != MTLStoreActionDontCare)
1999 cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad;
2000
2001 oldStencilLoad = cbD->d->currentPassRpDesc.stencilAttachment.loadAction;
2002 if (cbD->d->currentPassRpDesc.stencilAttachment.storeAction != MTLStoreActionDontCare)
2003 cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad;
2004 }
2005
2006 cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc];
2007 cbD->resetPerPassCachedState();
2008
2009 for (uint i = 0; i < uint(rtD->colorAttCount); ++i) {
2010 cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = oldColorLoad[i];
2011 }
2012
2013 if (rtD->dsAttCount) {
2014 cbD->d->currentPassRpDesc.depthAttachment.loadAction = oldDepthLoad;
2015 cbD->d->currentPassRpDesc.stencilAttachment.loadAction = oldStencilLoad;
2016 }
2017
2018}
2019
2020void QRhiMetal::tessellatedDraw(const TessDrawArgs &args)
2021{
2022 QMetalCommandBuffer *cbD = args.cbD;
2023 QMetalGraphicsPipeline *graphicsPipeline = cbD->currentGraphicsPipeline;
2024 if (graphicsPipeline->d->tess.failed)
2025 return;
2026
2027 const bool indexed = args.type != TessDrawArgs::NonIndexed;
2028 const quint32 instanceCount = indexed ? args.drawIndexed.instanceCount : args.draw.instanceCount;
2029 const quint32 vertexOrIndexCount = indexed ? args.drawIndexed.indexCount : args.draw.vertexCount;
2030
2031 QMetalGraphicsPipelineData::Tessellation &tess(graphicsPipeline->d->tess);
2032 QMetalGraphicsPipelineData::ExtraBufferManager &extraBufMgr(graphicsPipeline->d->extraBufMgr);
2033 const quint32 patchCount = tess.patchCountForDrawCall(vertexOrIndexCount, instanceCount);
2034 QMetalBuffer *vertOutBuf = nullptr;
2035 QMetalBuffer *tescOutBuf = nullptr;
2036 QMetalBuffer *tescPatchOutBuf = nullptr;
2037 QMetalBuffer *tescFactorBuf = nullptr;
2038 QMetalBuffer *tescParamsBuf = nullptr;
2039 id<MTLComputeCommandEncoder> vertTescComputeEncoder = tessellationComputeEncoder(cbD);
2040
2041 // Step 1: vertex shader (as compute)
2042 {
2043 id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder;
2044 QShader::Variant shaderVariant = QShader::NonIndexedVertexAsComputeShader;
2045 if (args.type == TessDrawArgs::U16Indexed)
2046 shaderVariant = QShader::UInt16IndexedVertexAsComputeShader;
2047 else if (args.type == TessDrawArgs::U32Indexed)
2048 shaderVariant = QShader::UInt32IndexedVertexAsComputeShader;
2049 const int varIndex = QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(shaderVariant);
2050 id<MTLComputePipelineState> computePipelineState = tess.vsCompPipeline(this, shaderVariant);
2051 [computeEncoder setComputePipelineState: computePipelineState];
2052
2053 // Make uniform buffers, textures, and samplers (meant for the
2054 // vertex stage from the client's point of view) visible in the
2055 // "vertex as compute" shader
2056 cbD->d->currentComputePassEncoder = computeEncoder;
2057 rebindShaderResources(cbD, QMetalShaderResourceBindingsData::VERTEX, QMetalShaderResourceBindingsData::COMPUTE);
2058 cbD->d->currentComputePassEncoder = nil;
2059
2060 const QMap<int, int> &ebb(tess.compVs[varIndex].nativeShaderInfo.extraBufferBindings);
2061 const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
2062 const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1);
2063
2064 if (outputBufferBinding >= 0) {
2065 const quint32 workBufSize = tess.vsCompOutputBufferSize(vertexOrIndexCount, instanceCount);
2066 vertOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize);
2067 if (!vertOutBuf)
2068 return;
2069 [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding];
2070 }
2071
2072 if (indexBufferBinding >= 0)
2073 [computeEncoder setBuffer: (id<MTLBuffer>) args.drawIndexed.indexBuffer offset: 0 atIndex: indexBufferBinding];
2074
2075 for (int i = 0, ie = cbD->d->currentVertexInputsBuffers.batches.count(); i != ie; ++i) {
2076 const auto &bufferBatch(cbD->d->currentVertexInputsBuffers.batches[i]);
2077 const auto &offsetBatch(cbD->d->currentVertexInputOffsets.batches[i]);
2078 [computeEncoder setBuffers: bufferBatch.resources.constData()
2079 offsets: offsetBatch.resources.constData()
2080 withRange: NSMakeRange(uint(cbD->d->currentFirstVertexBinding) + bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
2081 }
2082
2083 if (indexed) {
2084 [computeEncoder setStageInRegion: MTLRegionMake2D(args.drawIndexed.vertexOffset, args.drawIndexed.firstInstance,
2085 args.drawIndexed.indexCount, args.drawIndexed.instanceCount)];
2086 } else {
2087 [computeEncoder setStageInRegion: MTLRegionMake2D(args.draw.firstVertex, args.draw.firstInstance,
2088 args.draw.vertexCount, args.draw.instanceCount)];
2089 }
2090
2091 [computeEncoder dispatchThreads: MTLSizeMake(vertexOrIndexCount, instanceCount, 1)
2092 threadsPerThreadgroup: MTLSizeMake(computePipelineState.threadExecutionWidth, 1, 1)];
2093 }
2094
2095 // Step 2: tessellation control shader (as compute)
2096 {
2097 id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder;
2098 id<MTLComputePipelineState> computePipelineState = tess.tescCompPipeline(this);
2099 [computeEncoder setComputePipelineState: computePipelineState];
2100
2101 cbD->d->currentComputePassEncoder = computeEncoder;
2102 rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSCTRL, QMetalShaderResourceBindingsData::COMPUTE);
2103 cbD->d->currentComputePassEncoder = nil;
2104
2105 const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings);
2106 const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
2107 const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1);
2108 const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1);
2109 const int paramsBufferBinding = ebb.value(QShaderPrivate::MslTessTescParamsBufferBinding, -1);
2110 const int inputBufferBinding = ebb.value(QShaderPrivate::MslTessTescInputBufferBinding, -1);
2111
2112 if (outputBufferBinding >= 0) {
2113 const quint32 workBufSize = tess.tescCompOutputBufferSize(patchCount);
2114 tescOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize);
2115 if (!tescOutBuf)
2116 return;
2117 [computeEncoder setBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding];
2118 }
2119
2120 if (patchOutputBufferBinding >= 0) {
2121 const quint32 workBufSize = tess.tescCompPatchOutputBufferSize(patchCount);
2122 tescPatchOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize);
2123 if (!tescPatchOutBuf)
2124 return;
2125 [computeEncoder setBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding];
2126 }
2127
2128 if (tessFactorBufferBinding >= 0) {
2129 tescFactorBuf = extraBufMgr.acquireWorkBuffer(this, patchCount * sizeof(MTLQuadTessellationFactorsHalf));
2130 [computeEncoder setBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding];
2131 }
2132
2133 if (paramsBufferBinding >= 0) {
2134 struct {
2135 quint32 inControlPointCount;
2136 quint32 patchCount;
2137 } params;
2138 tescParamsBuf = extraBufMgr.acquireWorkBuffer(this, sizeof(params), QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible);
2139 if (!tescParamsBuf)
2140 return;
2141 params.inControlPointCount = tess.inControlPointCount;
2142 params.patchCount = patchCount;
2143 id<MTLBuffer> paramsBuf = tescParamsBuf->d->buf[0];
2144 char *p = reinterpret_cast<char *>([paramsBuf contents]);
2145 memcpy(p, &params, sizeof(params));
2146 [computeEncoder setBuffer: paramsBuf offset: 0 atIndex: paramsBufferBinding];
2147 }
2148
2149 if (vertOutBuf && inputBufferBinding >= 0)
2150 [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: inputBufferBinding];
2151
2152 int sgSize = int(computePipelineState.threadExecutionWidth);
2153 int wgSize = std::lcm(tess.outControlPointCount, sgSize);
2154 while (wgSize > caps.maxThreadGroupSize) {
2155 sgSize /= 2;
2156 wgSize = std::lcm(tess.outControlPointCount, sgSize);
2157 }
2158 [computeEncoder dispatchThreads: MTLSizeMake(patchCount * tess.outControlPointCount, 1, 1)
2159 threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
2160 }
2161
2162 // Much of the state in the QMetalCommandBuffer is going to be reset
2163 // when we get a new render encoder. Save what we need. (cheaper than
2164 // starting to walk over the srb again)
2165 const QMetalShaderResourceBindingsData resourceBindings = cbD->d->currentShaderResourceBindingState;
2166
2167 endTessellationComputeEncoding(cbD);
2168
2169 // Step 3: tessellation evaluation (as vertex) + fragment shader
2170 {
2171 // No need to call tess.teseFragRenderPipeline because it was done
2172 // once and we know the result is stored in the standard place
2173 // (graphicsPipeline->d->ps).
2174
2175 graphicsPipeline->makeActiveForCurrentRenderPassEncoder(cbD);
2176 id<MTLRenderCommandEncoder> renderEncoder = cbD->d->currentRenderPassEncoder;
2177
2178 rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSEVAL, QMetalShaderResourceBindingsData::VERTEX, &resourceBindings);
2179 rebindShaderResources(cbD, QMetalShaderResourceBindingsData::FRAGMENT, QMetalShaderResourceBindingsData::FRAGMENT, &resourceBindings);
2180
2181 const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings);
2182 const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
2183 const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1);
2184 const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1);
2185
2186 if (outputBufferBinding >= 0 && tescOutBuf)
2187 [renderEncoder setVertexBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding];
2188
2189 if (patchOutputBufferBinding >= 0 && tescPatchOutBuf)
2190 [renderEncoder setVertexBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding];
2191
2192 if (tessFactorBufferBinding >= 0 && tescFactorBuf) {
2193 [renderEncoder setTessellationFactorBuffer: tescFactorBuf->d->buf[0] offset: 0 instanceStride: 0];
2194 [renderEncoder setVertexBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding];
2195 }
2196
2197 [cbD->d->currentRenderPassEncoder drawPatches: tess.outControlPointCount
2198 patchStart: 0
2199 patchCount: patchCount
2200 patchIndexBuffer: nil
2201 patchIndexBufferOffset: 0
2202 instanceCount: 1
2203 baseInstance: 0];
2204 }
2205}
2206
2207void QRhiMetal::adjustForMultiViewDraw(quint32 *instanceCount, QRhiCommandBuffer *cb)
2208{
2209 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
2210 const int multiViewCount = cbD->currentGraphicsPipeline->m_multiViewCount;
2211 if (multiViewCount <= 1)
2212 return;
2213
2214 const QMap<int, int> &ebb(cbD->currentGraphicsPipeline->d->vs.nativeShaderInfo.extraBufferBindings);
2215 const int viewMaskBufBinding = ebb.value(QShaderPrivate::MslMultiViewMaskBufferBinding, -1);
2216 if (viewMaskBufBinding == -1) {
2217 qWarning("No extra buffer for multiview in the vertex shader; was it built with --view-count specified?");
2218 return;
2219 }
2220 struct {
2221 quint32 viewOffset;
2222 quint32 viewCount;
2223 } multiViewInfo;
2224 multiViewInfo.viewOffset = 0;
2225 multiViewInfo.viewCount = quint32(multiViewCount);
2226 QMetalBuffer *buf = cbD->currentGraphicsPipeline->d->extraBufMgr.acquireWorkBuffer(this, sizeof(multiViewInfo),
2227 QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible);
2228 if (buf) {
2229 id<MTLBuffer> mtlbuf = buf->d->buf[0];
2230 char *p = reinterpret_cast<char *>([mtlbuf contents]);
2231 memcpy(p, &multiViewInfo, sizeof(multiViewInfo));
2232 [cbD->d->currentRenderPassEncoder setVertexBuffer: mtlbuf offset: 0 atIndex: viewMaskBufBinding];
2233 // The instance count is adjusted for layered rendering. The vertex shader is expected to contain something like:
2234 // uint gl_ViewIndex = spvViewMask[0] + (gl_InstanceIndex - gl_BaseInstance) % spvViewMask[1];
2235 // where spvViewMask is the buffer with multiViewInfo passed in above.
2236 *instanceCount *= multiViewCount;
2237 }
2238}
2239
2240void QRhiMetal::draw(QRhiCommandBuffer *cb, quint32 vertexCount,
2241 quint32 instanceCount, quint32 firstVertex, quint32 firstInstance)
2242{
2243 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
2244 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
2245
2246 if (cbD->currentGraphicsPipeline->d->tess.enabled) {
2247 TessDrawArgs a;
2248 a.cbD = cbD;
2249 a.type = TessDrawArgs::NonIndexed;
2250 a.draw.vertexCount = vertexCount;
2251 a.draw.instanceCount = instanceCount;
2252 a.draw.firstVertex = firstVertex;
2253 a.draw.firstInstance = firstInstance;
2254 tessellatedDraw(a);
2255 return;
2256 }
2257
2258 adjustForMultiViewDraw(&instanceCount, cb);
2259
2260 if (caps.baseVertexAndInstance) {
2261 [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
2262 vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance];
2263 } else {
2264 [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
2265 vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount];
2266 }
2267}
2268
2269void QRhiMetal::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount,
2270 quint32 instanceCount, quint32 firstIndex, qint32 vertexOffset, quint32 firstInstance)
2271{
2272 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
2273 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
2274
2275 if (!cbD->currentIndexBuffer)
2276 return;
2277
2278 const quint32 indexOffset = cbD->currentIndexOffset + firstIndex * (cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? 2 : 4);
2279 Q_ASSERT(indexOffset == aligned(indexOffset, 4u));
2280
2281 QMetalBuffer *ibufD = cbD->currentIndexBuffer;
2282 id<MTLBuffer> mtlibuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0];
2283
2284 if (cbD->currentGraphicsPipeline->d->tess.enabled) {
2285 TessDrawArgs a;
2286 a.cbD = cbD;
2287 a.type = cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? TessDrawArgs::U16Indexed : TessDrawArgs::U32Indexed;
2288 a.drawIndexed.indexCount = indexCount;
2289 a.drawIndexed.instanceCount = instanceCount;
2290 a.drawIndexed.firstIndex = firstIndex;
2291 a.drawIndexed.vertexOffset = vertexOffset;
2292 a.drawIndexed.firstInstance = firstInstance;
2293 a.drawIndexed.indexBuffer = mtlibuf;
2294 tessellatedDraw(a);
2295 return;
2296 }
2297
2298 adjustForMultiViewDraw(&instanceCount, cb);
2299
2300 if (caps.baseVertexAndInstance) {
2301 [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
2302 indexCount: indexCount
2303 indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32
2304 indexBuffer: mtlibuf
2305 indexBufferOffset: indexOffset
2306 instanceCount: instanceCount
2307 baseVertex: vertexOffset
2308 baseInstance: firstInstance];
2309 } else {
2310 [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
2311 indexCount: indexCount
2312 indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32
2313 indexBuffer: mtlibuf
2314 indexBufferOffset: indexOffset
2315 instanceCount: instanceCount];
2316 }
2317}
2318
2319void QRhiMetal::debugMarkBegin(QRhiCommandBuffer *cb, const QByteArray &name)
2320{
2321 if (!debugMarkers)
2322 return;
2323
2324 NSString *str = [NSString stringWithUTF8String: name.constData()];
2325 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
2326 if (cbD->recordingPass != QMetalCommandBuffer::NoPass)
2327 [cbD->d->currentRenderPassEncoder pushDebugGroup: str];
2328 else
2329 [cbD->d->cb pushDebugGroup: str];
2330}
2331
2332void QRhiMetal::debugMarkEnd(QRhiCommandBuffer *cb)
2333{
2334 if (!debugMarkers)
2335 return;
2336
2337 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
2338 if (cbD->recordingPass != QMetalCommandBuffer::NoPass)
2339 [cbD->d->currentRenderPassEncoder popDebugGroup];
2340 else
2341 [cbD->d->cb popDebugGroup];
2342}
2343
2344void QRhiMetal::debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg)
2345{
2346 if (!debugMarkers)
2347 return;
2348
2349 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
2350 if (cbD->recordingPass != QMetalCommandBuffer::NoPass)
2351 [cbD->d->currentRenderPassEncoder insertDebugSignpost: [NSString stringWithUTF8String: msg.constData()]];
2352}
2353
2354const QRhiNativeHandles *QRhiMetal::nativeHandles(QRhiCommandBuffer *cb)
2355{
2356 return QRHI_RES(QMetalCommandBuffer, cb)->nativeHandles();
2357}
2358
2359void QRhiMetal::beginExternal(QRhiCommandBuffer *cb)
2360{
2361 Q_UNUSED(cb);
2362}
2363
2364void QRhiMetal::endExternal(QRhiCommandBuffer *cb)
2365{
2366 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
2367 cbD->resetPerPassCachedState();
2368}
2369
2370double QRhiMetal::lastCompletedGpuTime(QRhiCommandBuffer *cb)
2371{
2372 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
2373 return cbD->d->lastGpuTime;
2374}
2375
2376QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
2377{
2378 Q_UNUSED(flags);
2379
2380 QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain);
2381 currentSwapChain = swapChainD;
2382 currentFrameSlot = swapChainD->currentFrameSlot;
2383
2384 // If we are too far ahead, block. This is also what ensures that any
2385 // resource used in the previous frame for this slot is now not in use
2386 // anymore by the GPU.
2387 dispatch_semaphore_wait(swapChainD->d->sem[currentFrameSlot], DISPATCH_TIME_FOREVER);
2388
2389 // Do this also for any other swapchain's commands with the same frame slot
2390 // While this reduces concurrency, it keeps resource usage safe: swapchain
2391 // A starting its frame 0, followed by swapchain B starting its own frame 0
2392 // will make B wait for A's frame 0 commands, so if a resource is written
2393 // in B's frame or when B checks for pending resource releases, that won't
2394 // mess up A's in-flight commands (as they are not in flight anymore).
2395 for (QMetalSwapChain *sc : std::as_const(swapchains)) {
2396 if (sc != swapChainD)
2397 sc->waitUntilCompleted(currentFrameSlot); // wait+signal
2398 }
2399
2400 [d->captureScope beginScope];
2401
2402 swapChainD->cbWrapper.d->cb = d->newCommandBuffer();
2403
2404 QMetalRenderTargetData::ColorAtt colorAtt;
2405 if (swapChainD->samples > 1) {
2406 colorAtt.tex = swapChainD->d->msaaTex[currentFrameSlot];
2407 colorAtt.needsDrawableForResolveTex = true;
2408 } else {
2409 colorAtt.needsDrawableForTex = true;
2410 }
2411
2412 swapChainD->rtWrapper.d->fb.colorAtt[0] = colorAtt;
2413 swapChainD->rtWrapper.d->fb.dsTex = swapChainD->ds ? swapChainD->ds->d->tex : nil;
2414 swapChainD->rtWrapper.d->fb.dsResolveTex = nil;
2415 swapChainD->rtWrapper.d->fb.hasStencil = swapChainD->ds ? true : false;
2416 swapChainD->rtWrapper.d->fb.depthNeedsStore = false;
2417
2418 if (swapChainD->ds)
2419 swapChainD->ds->lastActiveFrameSlot = currentFrameSlot;
2420
2421 executeDeferredReleases();
2422 swapChainD->cbWrapper.resetState(swapChainD->d->lastGpuTime[currentFrameSlot]);
2423 swapChainD->d->lastGpuTime[currentFrameSlot] = 0;
2424 finishActiveReadbacks();
2425
2426 return QRhi::FrameOpSuccess;
2427}
2428
2429QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrameFlags flags)
2430{
2431 QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain);
2432 Q_ASSERT(currentSwapChain == swapChainD);
2433
2434 // Keep strong reference to command buffer
2435 id<MTLCommandBuffer> commandBuffer = swapChainD->cbWrapper.d->cb;
2436
2437 __block int thisFrameSlot = currentFrameSlot;
2438 [commandBuffer addCompletedHandler: ^(id<MTLCommandBuffer> cb) {
2439 swapChainD->d->lastGpuTime[thisFrameSlot] += cb.GPUEndTime - cb.GPUStartTime;
2440 dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]);
2441 }];
2442
2444 // When Metal API validation diagnostics is enabled in Xcode the texture is
2445 // released before the command buffer is done with it. Manually keep it alive
2446 // to work around this.
2447 id<MTLTexture> drawableTexture = [swapChainD->d->curDrawable.texture retain];
2448 [commandBuffer addCompletedHandler:^(id<MTLCommandBuffer>) {
2449 [drawableTexture release];
2450 }];
2451#endif
2452
2453 if (flags.testFlag(QRhi::SkipPresent)) {
2454 // Just need to commit, that's it
2455 [commandBuffer commit];
2456 } else {
2457 if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) {
2458 // Got something to present
2459 if (swapChainD->d->layer.presentsWithTransaction) {
2460 [commandBuffer commit];
2461 // Keep strong reference to Metal layer
2462 auto *metalLayer = swapChainD->d->layer;
2463 auto presentWithTransaction = ^{
2464 [commandBuffer waitUntilScheduled];
2465 // If the layer has been resized while we waited to be scheduled we bail out,
2466 // as the drawable is no longer valid for the layer, and we'll get a follow-up
2467 // display with the right size. We know we are on the main thread here, which
2468 // means we can access the layer directly. We also know that the layer is valid,
2469 // since the block keeps a strong reference to it, compared to the QRhiSwapChain
2470 // that can go away under our feet by the time we're scheduled.
2471 const auto surfaceSize = QSizeF::fromCGSize(metalLayer.bounds.size) * metalLayer.contentsScale;
2472 const auto textureSize = QSizeF(drawable.texture.width, drawable.texture.height);
2473 if (textureSize == surfaceSize) {
2474 [drawable present];
2475 } else {
2476 qCDebug(QRHI_LOG_INFO) << "Skipping" << drawable << "due to texture size"
2477 << textureSize << "not matching surface size" << surfaceSize;
2478 }
2479 };
2480
2481 if (NSThread.currentThread == NSThread.mainThread) {
2482 presentWithTransaction();
2483 } else {
2484 auto *qtMetalLayer = qt_objc_cast<QMetalLayer*>(swapChainD->d->layer);
2485 Q_ASSERT(qtMetalLayer);
2486 // Let the main thread present the drawable from displayLayer
2487 qtMetalLayer.mainThreadPresentation = presentWithTransaction;
2488 }
2489 } else {
2490 // Keep strong reference to Metal layer so it's valid in the block
2491 auto *qtMetalLayer = qt_objc_cast<QMetalLayer*>(swapChainD->d->layer);
2492 [commandBuffer addScheduledHandler:^(id<MTLCommandBuffer>) {
2493 if (qtMetalLayer) {
2494 // The schedule handler comes in on the com.Metal.CompletionQueueDispatch
2495 // thread, which means we might be racing against a display cycle on the
2496 // main thread. If the displayLayer is already in progress, we don't want
2497 // to step on its toes.
2498 if (qtMetalLayer.displayLock.tryLockForRead()) {
2499 [drawable present];
2500 qtMetalLayer.displayLock.unlock();
2501 } else {
2502 qCDebug(QRHI_LOG_INFO) << "Skipping" << drawable
2503 << "due to" << qtMetalLayer << "needing display";
2504 }
2505 } else {
2506 [drawable present];
2507 }
2508 }];
2509 [commandBuffer commit];
2510 }
2511 } else {
2512 // Still need to commit, even if we don't have a drawable
2513 [commandBuffer commit];
2514 }
2515
2516 swapChainD->currentFrameSlot = (swapChainD->currentFrameSlot + 1) % QMTL_FRAMES_IN_FLIGHT;
2517 }
2518
2519 // Must not hold on to the drawable, regardless of needsPresent
2520 [swapChainD->d->curDrawable release];
2521 swapChainD->d->curDrawable = nil;
2522
2523 [d->captureScope endScope];
2524
2525 swapChainD->frameCount += 1;
2526 currentSwapChain = nullptr;
2527 return QRhi::FrameOpSuccess;
2528}
2529
2530QRhi::FrameOpResult QRhiMetal::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi::BeginFrameFlags flags)
2531{
2532 Q_UNUSED(flags);
2533
2534 currentFrameSlot = (currentFrameSlot + 1) % QMTL_FRAMES_IN_FLIGHT;
2535
2536 for (QMetalSwapChain *sc : std::as_const(swapchains))
2537 sc->waitUntilCompleted(currentFrameSlot);
2538
2539 d->ofr.active = true;
2540 *cb = &d->ofr.cbWrapper;
2541 d->ofr.cbWrapper.d->cb = d->newCommandBuffer();
2542
2543 executeDeferredReleases();
2544 d->ofr.cbWrapper.resetState(d->ofr.lastGpuTime);
2545 d->ofr.lastGpuTime = 0;
2546 finishActiveReadbacks();
2547
2548 return QRhi::FrameOpSuccess;
2549}
2550
2551QRhi::FrameOpResult QRhiMetal::endOffscreenFrame(QRhi::EndFrameFlags flags)
2552{
2553 Q_UNUSED(flags);
2554 Q_ASSERT(d->ofr.active);
2555 d->ofr.active = false;
2556
2557 id<MTLCommandBuffer> cb = d->ofr.cbWrapper.d->cb;
2558 [cb commit];
2559
2560 // offscreen frames wait for completion, unlike swapchain ones
2561 [cb waitUntilCompleted];
2562
2563 d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime;
2564
2565 finishActiveReadbacks(true);
2566
2567 return QRhi::FrameOpSuccess;
2568}
2569
2570QRhi::FrameOpResult QRhiMetal::finish()
2571{
2572 id<MTLCommandBuffer> cb = nil;
2573 QMetalSwapChain *swapChainD = nullptr;
2574 if (inFrame) {
2575 if (d->ofr.active) {
2576 Q_ASSERT(!currentSwapChain);
2577 Q_ASSERT(d->ofr.cbWrapper.recordingPass == QMetalCommandBuffer::NoPass);
2578 cb = d->ofr.cbWrapper.d->cb;
2579 } else {
2580 Q_ASSERT(currentSwapChain);
2581 swapChainD = currentSwapChain;
2582 Q_ASSERT(swapChainD->cbWrapper.recordingPass == QMetalCommandBuffer::NoPass);
2583 cb = swapChainD->cbWrapper.d->cb;
2584 }
2585 }
2586
2587 for (QMetalSwapChain *sc : std::as_const(swapchains)) {
2588 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
2589 if (currentSwapChain && sc == currentSwapChain && i == currentFrameSlot) {
2590 // no wait as this is the thing we're going to be commit below and
2591 // beginFrame decremented sem already and going to be signaled by endFrame
2592 continue;
2593 }
2594 sc->waitUntilCompleted(i);
2595 }
2596 }
2597
2598 if (cb) {
2599 [cb commit];
2600 [cb waitUntilCompleted];
2601 }
2602
2603 if (inFrame) {
2604 if (d->ofr.active) {
2605 d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime;
2606 d->ofr.cbWrapper.d->cb = d->newCommandBuffer();
2607 } else {
2608 swapChainD->d->lastGpuTime[currentFrameSlot] += cb.GPUEndTime - cb.GPUStartTime;
2609 swapChainD->cbWrapper.d->cb = d->newCommandBuffer();
2610 }
2611 }
2612
2613 executeDeferredReleases(true);
2614
2615 finishActiveReadbacks(true);
2616
2617 return QRhi::FrameOpSuccess;
2618}
2619
2620MTLRenderPassDescriptor *QRhiMetalData::createDefaultRenderPass(bool hasDepthStencil,
2621 const QColor &colorClearValue,
2622 const QRhiDepthStencilClearValue &depthStencilClearValue,
2623 int colorAttCount,
2624 QRhiShadingRateMap *shadingRateMap)
2625{
2626 MTLRenderPassDescriptor *rp = [MTLRenderPassDescriptor renderPassDescriptor];
2627 MTLClearColor c = MTLClearColorMake(colorClearValue.redF(), colorClearValue.greenF(), colorClearValue.blueF(),
2628 colorClearValue.alphaF());
2629
2630 for (uint i = 0; i < uint(colorAttCount); ++i) {
2631 rp.colorAttachments[i].loadAction = MTLLoadActionClear;
2632 rp.colorAttachments[i].storeAction = MTLStoreActionStore;
2633 rp.colorAttachments[i].clearColor = c;
2634 }
2635
2636 if (hasDepthStencil) {
2637 rp.depthAttachment.loadAction = MTLLoadActionClear;
2638 rp.depthAttachment.storeAction = MTLStoreActionDontCare;
2639 rp.stencilAttachment.loadAction = MTLLoadActionClear;
2640 rp.stencilAttachment.storeAction = MTLStoreActionDontCare;
2641 rp.depthAttachment.clearDepth = double(depthStencilClearValue.depthClearValue());
2642 rp.stencilAttachment.clearStencil = depthStencilClearValue.stencilClearValue();
2643 }
2644
2645 if (shadingRateMap)
2646 rp.rasterizationRateMap = QRHI_RES(QMetalShadingRateMap, shadingRateMap)->d->rateMap;
2647
2648 return rp;
2649}
2650
2651qsizetype QRhiMetal::subresUploadByteSize(const QRhiTextureSubresourceUploadDescription &subresDesc) const
2652{
2653 qsizetype size = 0;
2654 const qsizetype imageSizeBytes = subresDesc.image().isNull() ?
2655 subresDesc.data().size() : subresDesc.image().sizeInBytes();
2656 if (imageSizeBytes > 0)
2657 size += aligned<qsizetype>(imageSizeBytes, QRhiMetalData::TEXBUF_ALIGN);
2658 return size;
2659}
2660
2661void QRhiMetal::enqueueSubresUpload(QMetalTexture *texD, void *mp, void *blitEncPtr,
2662 int layer, int level, const QRhiTextureSubresourceUploadDescription &subresDesc,
2663 qsizetype *curOfs)
2664{
2665 const QPoint dp = subresDesc.destinationTopLeft();
2666 const QByteArray rawData = subresDesc.data();
2667 QImage img = subresDesc.image();
2668 const bool is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
2669 id<MTLBlitCommandEncoder> blitEnc = (id<MTLBlitCommandEncoder>) blitEncPtr;
2670
2671 if (!img.isNull()) {
2672 const qsizetype fullImageSizeBytes = img.sizeInBytes();
2673 int w = img.width();
2674 int h = img.height();
2675 int bpl = img.bytesPerLine();
2676
2677 if (!subresDesc.sourceSize().isEmpty() || !subresDesc.sourceTopLeft().isNull()) {
2678 const int sx = subresDesc.sourceTopLeft().x();
2679 const int sy = subresDesc.sourceTopLeft().y();
2680 if (!subresDesc.sourceSize().isEmpty()) {
2681 w = subresDesc.sourceSize().width();
2682 h = subresDesc.sourceSize().height();
2683 }
2684 if (w == img.width()) {
2685 const int bpc = qMax(1, img.depth() / 8);
2686 Q_ASSERT(h * img.bytesPerLine() <= fullImageSizeBytes);
2687 memcpy(reinterpret_cast<char *>(mp) + *curOfs,
2688 img.constBits() + sy * img.bytesPerLine() + sx * bpc,
2689 h * img.bytesPerLine());
2690 } else {
2691 img = img.copy(sx, sy, w, h);
2692 bpl = img.bytesPerLine();
2693 Q_ASSERT(img.sizeInBytes() <= fullImageSizeBytes);
2694 memcpy(reinterpret_cast<char *>(mp) + *curOfs, img.constBits(), size_t(img.sizeInBytes()));
2695 }
2696 } else {
2697 memcpy(reinterpret_cast<char *>(mp) + *curOfs, img.constBits(), size_t(fullImageSizeBytes));
2698 }
2699
2700 [blitEnc copyFromBuffer: texD->d->stagingBuf[currentFrameSlot]
2701 sourceOffset: NSUInteger(*curOfs)
2702 sourceBytesPerRow: NSUInteger(bpl)
2703 sourceBytesPerImage: 0
2704 sourceSize: MTLSizeMake(NSUInteger(w), NSUInteger(h), 1)
2705 toTexture: texD->d->tex
2706 destinationSlice: NSUInteger(is3D ? 0 : layer)
2707 destinationLevel: NSUInteger(level)
2708 destinationOrigin: MTLOriginMake(NSUInteger(dp.x()), NSUInteger(dp.y()), NSUInteger(is3D ? layer : 0))
2709 options: MTLBlitOptionNone];
2710
2711 *curOfs += aligned<qsizetype>(fullImageSizeBytes, QRhiMetalData::TEXBUF_ALIGN);
2712 } else if (!rawData.isEmpty() && isCompressedFormat(texD->m_format)) {
2713 const QSize subresSize = q->sizeForMipLevel(level, texD->m_pixelSize);
2714 const int subresw = subresSize.width();
2715 const int subresh = subresSize.height();
2716 int w, h;
2717 if (subresDesc.sourceSize().isEmpty()) {
2718 w = subresw;
2719 h = subresh;
2720 } else {
2721 w = subresDesc.sourceSize().width();
2722 h = subresDesc.sourceSize().height();
2723 }
2724
2725 quint32 bpl = 0;
2726 QSize blockDim;
2727 compressedFormatInfo(texD->m_format, QSize(w, h), &bpl, nullptr, &blockDim);
2728
2729 const int dx = aligned(dp.x(), blockDim.width());
2730 const int dy = aligned(dp.y(), blockDim.height());
2731 if (dx + w != subresw)
2732 w = aligned(w, blockDim.width());
2733 if (dy + h != subresh)
2734 h = aligned(h, blockDim.height());
2735
2736 memcpy(reinterpret_cast<char *>(mp) + *curOfs, rawData.constData(), size_t(rawData.size()));
2737
2738 [blitEnc copyFromBuffer: texD->d->stagingBuf[currentFrameSlot]
2739 sourceOffset: NSUInteger(*curOfs)
2740 sourceBytesPerRow: bpl
2741 sourceBytesPerImage: 0
2742 sourceSize: MTLSizeMake(NSUInteger(w), NSUInteger(h), 1)
2743 toTexture: texD->d->tex
2744 destinationSlice: NSUInteger(is3D ? 0 : layer)
2745 destinationLevel: NSUInteger(level)
2746 destinationOrigin: MTLOriginMake(NSUInteger(dx), NSUInteger(dy), NSUInteger(is3D ? layer : 0))
2747 options: MTLBlitOptionNone];
2748
2749 *curOfs += aligned<qsizetype>(rawData.size(), QRhiMetalData::TEXBUF_ALIGN);
2750 } else if (!rawData.isEmpty()) {
2751 const QSize subresSize = q->sizeForMipLevel(level, texD->m_pixelSize);
2752 const int subresw = subresSize.width();
2753 const int subresh = subresSize.height();
2754 int w, h;
2755 if (subresDesc.sourceSize().isEmpty()) {
2756 w = subresw;
2757 h = subresh;
2758 } else {
2759 w = subresDesc.sourceSize().width();
2760 h = subresDesc.sourceSize().height();
2761 }
2762
2763 quint32 bpl = 0;
2764 if (subresDesc.dataStride())
2765 bpl = subresDesc.dataStride();
2766 else
2767 textureFormatInfo(texD->m_format, QSize(w, h), &bpl, nullptr, nullptr);
2768
2769 memcpy(reinterpret_cast<char *>(mp) + *curOfs, rawData.constData(), size_t(rawData.size()));
2770
2771 [blitEnc copyFromBuffer: texD->d->stagingBuf[currentFrameSlot]
2772 sourceOffset: NSUInteger(*curOfs)
2773 sourceBytesPerRow: bpl
2774 sourceBytesPerImage: 0
2775 sourceSize: MTLSizeMake(NSUInteger(w), NSUInteger(h), 1)
2776 toTexture: texD->d->tex
2777 destinationSlice: NSUInteger(is3D ? 0 : layer)
2778 destinationLevel: NSUInteger(level)
2779 destinationOrigin: MTLOriginMake(NSUInteger(dp.x()), NSUInteger(dp.y()), NSUInteger(is3D ? layer : 0))
2780 options: MTLBlitOptionNone];
2781
2782 *curOfs += aligned<qsizetype>(rawData.size(), QRhiMetalData::TEXBUF_ALIGN);
2783 } else {
2784 qWarning("Invalid texture upload for %p layer=%d mip=%d", texD, layer, level);
2785 }
2786}
2787
2788void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
2789{
2790 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
2791 QRhiResourceUpdateBatchPrivate *ud = QRhiResourceUpdateBatchPrivate::get(resourceUpdates);
2792
2793 id<MTLBlitCommandEncoder> blitEnc = nil;
2794 auto ensureBlit = [&blitEnc, cbD, this]() {
2795 if (!blitEnc) {
2796 blitEnc = [cbD->d->cb blitCommandEncoder];
2797 if (debugMarkers)
2798 [blitEnc pushDebugGroup: @"Texture upload/copy"];
2799 }
2800 };
2801
2802 for (int opIdx = 0; opIdx < ud->activeBufferOpCount; ++opIdx) {
2803 const QRhiResourceUpdateBatchPrivate::BufferOp &u(ud->bufferOps[opIdx]);
2804 if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::DynamicUpdate) {
2805 QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
2806 Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic);
2807 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
2808 if (u.offset == 0 && u.data.size() == bufD->m_size)
2809 bufD->d->pendingUpdates[i].clear();
2810 bufD->d->pendingUpdates[i].append({ u.offset, u.data });
2811 }
2812 } else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) {
2813 // Due to the Metal API the handling of static and dynamic buffers is
2814 // basically the same. So go through the same pendingUpdates machinery.
2815 QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
2816 Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic);
2817 Q_ASSERT(u.offset + u.data.size() <= bufD->m_size);
2818 for (int i = 0, ie = bufD->d->slotted ? QMTL_FRAMES_IN_FLIGHT : 1; i != ie; ++i)
2819 bufD->d->pendingUpdates[i].append({ u.offset, u.data });
2820 } else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::Read) {
2821 QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
2822 executeBufferHostWritesForCurrentFrame(bufD);
2823 const int idx = bufD->d->slotted ? currentFrameSlot : 0;
2824 if (bufD->m_type == QRhiBuffer::Dynamic) {
2825 char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
2826 if (p) {
2827 u.result->data.resize(u.readSize);
2828 memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
2829 }
2830 if (u.result->completed)
2831 u.result->completed();
2832 } else {
2833 QRhiMetalData::BufferReadback readback;
2834 readback.activeFrameSlot = idx;
2835 readback.buf = bufD->d->buf[idx];
2836 readback.offset = u.offset;
2837 readback.readSize = u.readSize;
2838 readback.result = u.result;
2839 d->activeBufferReadbacks.append(readback);
2840#ifdef Q_OS_MACOS
2841 if (bufD->d->managed) {
2842 // On non-Apple Silicon, manually synchronize memory from GPU to CPU
2843 ensureBlit();
2844 [blitEnc synchronizeResource:readback.buf];
2845 }
2846#endif
2847 }
2848 }
2849 }
2850
2851 for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) {
2852 const QRhiResourceUpdateBatchPrivate::TextureOp &u(ud->textureOps[opIdx]);
2853 if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Upload) {
2854 QMetalTexture *utexD = QRHI_RES(QMetalTexture, u.dst);
2855 qsizetype stagingSize = 0;
2856 for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) {
2857 for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) {
2858 for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level]))
2859 stagingSize += subresUploadByteSize(subresDesc);
2860 }
2861 }
2862
2863 ensureBlit();
2864 Q_ASSERT(!utexD->d->stagingBuf[currentFrameSlot]);
2865 utexD->d->stagingBuf[currentFrameSlot] = [d->dev newBufferWithLength: NSUInteger(stagingSize)
2866 options: MTLResourceStorageModeShared];
2867
2868 void *mp = [utexD->d->stagingBuf[currentFrameSlot] contents];
2869 qsizetype curOfs = 0;
2870 for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) {
2871 for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) {
2872 for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level]))
2873 enqueueSubresUpload(utexD, mp, blitEnc, layer, level, subresDesc, &curOfs);
2874 }
2875 }
2876
2877 utexD->lastActiveFrameSlot = currentFrameSlot;
2878
2879 QRhiMetalData::DeferredReleaseEntry e;
2880 e.type = QRhiMetalData::DeferredReleaseEntry::StagingBuffer;
2881 e.lastActiveFrameSlot = currentFrameSlot;
2882 e.stagingBuffer.buffer = utexD->d->stagingBuf[currentFrameSlot];
2883 utexD->d->stagingBuf[currentFrameSlot] = nil;
2884 d->releaseQueue.append(e);
2885 } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Copy) {
2886 Q_ASSERT(u.src && u.dst);
2887 QMetalTexture *srcD = QRHI_RES(QMetalTexture, u.src);
2888 QMetalTexture *dstD = QRHI_RES(QMetalTexture, u.dst);
2889 const bool srcIs3D = srcD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
2890 const bool dstIs3D = dstD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
2891 const QPoint dp = u.desc.destinationTopLeft();
2892 const QSize mipSize = q->sizeForMipLevel(u.desc.sourceLevel(), srcD->m_pixelSize);
2893 const QSize copySize = u.desc.pixelSize().isEmpty() ? mipSize : u.desc.pixelSize();
2894 const QPoint sp = u.desc.sourceTopLeft();
2895
2896 ensureBlit();
2897 [blitEnc copyFromTexture: srcD->d->tex
2898 sourceSlice: NSUInteger(srcIs3D ? 0 : u.desc.sourceLayer())
2899 sourceLevel: NSUInteger(u.desc.sourceLevel())
2900 sourceOrigin: MTLOriginMake(NSUInteger(sp.x()), NSUInteger(sp.y()), NSUInteger(srcIs3D ? u.desc.sourceLayer() : 0))
2901 sourceSize: MTLSizeMake(NSUInteger(copySize.width()), NSUInteger(copySize.height()), 1)
2902 toTexture: dstD->d->tex
2903 destinationSlice: NSUInteger(dstIs3D ? 0 : u.desc.destinationLayer())
2904 destinationLevel: NSUInteger(u.desc.destinationLevel())
2905 destinationOrigin: MTLOriginMake(NSUInteger(dp.x()), NSUInteger(dp.y()), NSUInteger(dstIs3D ? u.desc.destinationLayer() : 0))];
2906
2907 srcD->lastActiveFrameSlot = dstD->lastActiveFrameSlot = currentFrameSlot;
2908 } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Read) {
2909 QRhiMetalData::TextureReadback readback;
2910 readback.activeFrameSlot = currentFrameSlot;
2911 readback.desc = u.rb;
2912 readback.result = u.result;
2913
2914 QMetalTexture *texD = QRHI_RES(QMetalTexture, u.rb.texture());
2915 QMetalSwapChain *swapChainD = nullptr;
2916 id<MTLTexture> src;
2917 QSize srcSize;
2918 bool is3D = false;
2919 if (texD) {
2920 if (texD->samples > 1) {
2921 qWarning("Multisample texture cannot be read back");
2922 continue;
2923 }
2924 is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional);
2925 readback.pixelSize = q->sizeForMipLevel(u.rb.level(), texD->m_pixelSize);
2926 readback.format = texD->m_format;
2927 src = texD->d->tex;
2928 srcSize = readback.pixelSize;
2929 texD->lastActiveFrameSlot = currentFrameSlot;
2930 } else {
2931 Q_ASSERT(currentSwapChain);
2932 swapChainD = QRHI_RES(QMetalSwapChain, currentSwapChain);
2933 readback.pixelSize = swapChainD->pixelSize;
2934 readback.format = swapChainD->d->rhiColorFormat;
2935 // Multisample swapchains need nothing special since resolving
2936 // happens when ending a renderpass.
2937 const QMetalRenderTargetData::ColorAtt &colorAtt(swapChainD->rtWrapper.d->fb.colorAtt[0]);
2938 src = colorAtt.resolveTex ? colorAtt.resolveTex : colorAtt.tex;
2939 srcSize = swapChainD->rtWrapper.d->pixelSize;
2940 }
2941
2942 quint32 bpl = 0;
2943 textureFormatInfo(readback.format, readback.pixelSize, &bpl, &readback.bufSize, nullptr);
2944 readback.buf = [d->dev newBufferWithLength: readback.bufSize options: MTLResourceStorageModeShared];
2945
2946 ensureBlit();
2947 [blitEnc copyFromTexture: src
2948 sourceSlice: NSUInteger(is3D ? 0 : u.rb.layer())
2949 sourceLevel: NSUInteger(u.rb.level())
2950 sourceOrigin: MTLOriginMake(0, 0, is3D ? u.rb.layer() : 0)
2951 sourceSize: MTLSizeMake(NSUInteger(srcSize.width()), NSUInteger(srcSize.height()), 1)
2952 toBuffer: readback.buf
2953 destinationOffset: 0
2954 destinationBytesPerRow: bpl
2955 destinationBytesPerImage: 0
2956 options: MTLBlitOptionNone];
2957
2958 d->activeTextureReadbacks.append(readback);
2959 } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::GenMips) {
2960 QMetalTexture *utexD = QRHI_RES(QMetalTexture, u.dst);
2961 ensureBlit();
2962 [blitEnc generateMipmapsForTexture: utexD->d->tex];
2963 utexD->lastActiveFrameSlot = currentFrameSlot;
2964 }
2965 }
2966
2967 if (blitEnc) {
2968 if (debugMarkers)
2969 [blitEnc popDebugGroup];
2970 [blitEnc endEncoding];
2971 }
2972
2973 ud->free();
2974}
2975
2976// this handles all types of buffers, not just Dynamic
2977void QRhiMetal::executeBufferHostWritesForSlot(QMetalBuffer *bufD, int slot)
2978{
2979 if (bufD->d->pendingUpdates[slot].isEmpty())
2980 return;
2981
2982 void *p = [bufD->d->buf[slot] contents];
2983 quint32 changeBegin = UINT32_MAX;
2984 quint32 changeEnd = 0;
2985 for (const QMetalBufferData::BufferUpdate &u : std::as_const(bufD->d->pendingUpdates[slot])) {
2986 memcpy(static_cast<char *>(p) + u.offset, u.data.constData(), size_t(u.data.size()));
2987 if (u.offset < changeBegin)
2988 changeBegin = u.offset;
2989 if (u.offset + u.data.size() > changeEnd)
2990 changeEnd = u.offset + u.data.size();
2991 }
2992#ifdef Q_OS_MACOS
2993 if (changeBegin < UINT32_MAX && changeBegin < changeEnd && bufD->d->managed)
2994 [bufD->d->buf[slot] didModifyRange: NSMakeRange(NSUInteger(changeBegin), NSUInteger(changeEnd - changeBegin))];
2995#endif
2996
2997 bufD->d->pendingUpdates[slot].clear();
2998}
2999
3000void QRhiMetal::executeBufferHostWritesForCurrentFrame(QMetalBuffer *bufD)
3001{
3002 executeBufferHostWritesForSlot(bufD, bufD->d->slotted ? currentFrameSlot : 0);
3003}
3004
3005void QRhiMetal::resourceUpdate(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
3006{
3007 Q_ASSERT(QRHI_RES(QMetalCommandBuffer, cb)->recordingPass == QMetalCommandBuffer::NoPass);
3008
3009 enqueueResourceUpdates(cb, resourceUpdates);
3010}
3011
3012void QRhiMetal::beginPass(QRhiCommandBuffer *cb,
3013 QRhiRenderTarget *rt,
3014 const QColor &colorClearValue,
3015 const QRhiDepthStencilClearValue &depthStencilClearValue,
3016 QRhiResourceUpdateBatch *resourceUpdates,
3017 QRhiCommandBuffer::BeginPassFlags)
3018{
3019 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
3020 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::NoPass);
3021
3022 if (resourceUpdates)
3023 enqueueResourceUpdates(cb, resourceUpdates);
3024
3025 QMetalRenderTargetData *rtD = nullptr;
3026 switch (rt->resourceType()) {
3027 case QRhiResource::SwapChainRenderTarget:
3028 {
3029 QMetalSwapChainRenderTarget *rtSc = QRHI_RES(QMetalSwapChainRenderTarget, rt);
3030 rtD = rtSc->d;
3031 QRhiShadingRateMap *shadingRateMap = rtSc->swapChain()->shadingRateMap();
3032 cbD->d->currentPassRpDesc = d->createDefaultRenderPass(rtD->dsAttCount,
3033 colorClearValue,
3034 depthStencilClearValue,
3035 rtD->colorAttCount,
3036 shadingRateMap);
3037 if (rtD->colorAttCount) {
3038 QMetalRenderTargetData::ColorAtt &color0(rtD->fb.colorAtt[0]);
3039 if (color0.needsDrawableForTex || color0.needsDrawableForResolveTex) {
3040 Q_ASSERT(currentSwapChain);
3041 QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, currentSwapChain);
3042 if (!swapChainD->d->curDrawable) {
3043 QMacAutoReleasePool pool;
3044 swapChainD->d->curDrawable = [[swapChainD->d->layer nextDrawable] retain];
3045 }
3046 if (!swapChainD->d->curDrawable) {
3047 qWarning("No drawable");
3048 return;
3049 }
3050 id<MTLTexture> scTex = swapChainD->d->curDrawable.texture;
3051 if (color0.needsDrawableForTex) {
3052 color0.tex = scTex;
3053 color0.needsDrawableForTex = false;
3054 } else {
3055 color0.resolveTex = scTex;
3056 color0.needsDrawableForResolveTex = false;
3057 }
3058 }
3059 }
3060 if (shadingRateMap)
3061 QRHI_RES(QMetalShadingRateMap, shadingRateMap)->lastActiveFrameSlot = currentFrameSlot;
3062 }
3063 break;
3064 case QRhiResource::TextureRenderTarget:
3065 {
3066 QMetalTextureRenderTarget *rtTex = QRHI_RES(QMetalTextureRenderTarget, rt);
3067 rtD = rtTex->d;
3068 if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QMetalTexture, QMetalRenderBuffer>(rtTex->description(), rtD->currentResIdList))
3069 rtTex->create();
3070 cbD->d->currentPassRpDesc = d->createDefaultRenderPass(rtD->dsAttCount,
3071 colorClearValue,
3072 depthStencilClearValue,
3073 rtD->colorAttCount,
3074 rtTex->m_desc.shadingRateMap());
3075 if (rtD->fb.preserveColor) {
3076 for (uint i = 0; i < uint(rtD->colorAttCount); ++i)
3077 cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad;
3078 }
3079 if (rtD->dsAttCount && rtD->fb.preserveDs) {
3080 cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad;
3081 cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad;
3082 }
3083 int colorAttCount = 0;
3084 for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments();
3085 it != itEnd; ++it)
3086 {
3087 colorAttCount += 1;
3088 if (it->texture()) {
3089 QRHI_RES(QMetalTexture, it->texture())->lastActiveFrameSlot = currentFrameSlot;
3090 if (it->multiViewCount() >= 2)
3091 cbD->d->currentPassRpDesc.renderTargetArrayLength = NSUInteger(it->multiViewCount());
3092 } else if (it->renderBuffer()) {
3093 QRHI_RES(QMetalRenderBuffer, it->renderBuffer())->lastActiveFrameSlot = currentFrameSlot;
3094 }
3095 if (it->resolveTexture())
3096 QRHI_RES(QMetalTexture, it->resolveTexture())->lastActiveFrameSlot = currentFrameSlot;
3097 }
3098 if (rtTex->m_desc.depthStencilBuffer())
3099 QRHI_RES(QMetalRenderBuffer, rtTex->m_desc.depthStencilBuffer())->lastActiveFrameSlot = currentFrameSlot;
3100 if (rtTex->m_desc.depthTexture()) {
3101 QMetalTexture *depthTexture = QRHI_RES(QMetalTexture, rtTex->m_desc.depthTexture());
3102 depthTexture->lastActiveFrameSlot = currentFrameSlot;
3103 if (colorAttCount == 0 && depthTexture->arraySize() >= 2)
3104 cbD->d->currentPassRpDesc.renderTargetArrayLength = NSUInteger(depthTexture->arraySize());
3105 }
3106 if (rtTex->m_desc.depthResolveTexture())
3107 QRHI_RES(QMetalTexture, rtTex->m_desc.depthResolveTexture())->lastActiveFrameSlot = currentFrameSlot;
3108 if (rtTex->m_desc.shadingRateMap())
3109 QRHI_RES(QMetalShadingRateMap, rtTex->m_desc.shadingRateMap())->lastActiveFrameSlot = currentFrameSlot;
3110 }
3111 break;
3112 default:
3113 Q_UNREACHABLE();
3114 break;
3115 }
3116
3117 for (uint i = 0; i < uint(rtD->colorAttCount); ++i) {
3118 cbD->d->currentPassRpDesc.colorAttachments[i].texture = rtD->fb.colorAtt[i].tex;
3119 cbD->d->currentPassRpDesc.colorAttachments[i].slice = NSUInteger(rtD->fb.colorAtt[i].arrayLayer);
3120 cbD->d->currentPassRpDesc.colorAttachments[i].depthPlane = NSUInteger(rtD->fb.colorAtt[i].slice);
3121 cbD->d->currentPassRpDesc.colorAttachments[i].level = NSUInteger(rtD->fb.colorAtt[i].level);
3122 if (rtD->fb.colorAtt[i].resolveTex) {
3123 cbD->d->currentPassRpDesc.colorAttachments[i].storeAction = rtD->fb.preserveColor ? MTLStoreActionStoreAndMultisampleResolve
3124 : MTLStoreActionMultisampleResolve;
3125 cbD->d->currentPassRpDesc.colorAttachments[i].resolveTexture = rtD->fb.colorAtt[i].resolveTex;
3126 cbD->d->currentPassRpDesc.colorAttachments[i].resolveSlice = NSUInteger(rtD->fb.colorAtt[i].resolveLayer);
3127 cbD->d->currentPassRpDesc.colorAttachments[i].resolveLevel = NSUInteger(rtD->fb.colorAtt[i].resolveLevel);
3128 }
3129 }
3130
3131 if (rtD->dsAttCount) {
3132 Q_ASSERT(rtD->fb.dsTex);
3133 cbD->d->currentPassRpDesc.depthAttachment.texture = rtD->fb.dsTex;
3134 cbD->d->currentPassRpDesc.stencilAttachment.texture = rtD->fb.hasStencil ? rtD->fb.dsTex : nil;
3135 if (rtD->fb.depthNeedsStore) // Depth/Stencil is set to DontCare by default, override if needed
3136 cbD->d->currentPassRpDesc.depthAttachment.storeAction = MTLStoreActionStore;
3137 if (rtD->fb.dsResolveTex) {
3138 cbD->d->currentPassRpDesc.depthAttachment.storeAction = rtD->fb.depthNeedsStore ? MTLStoreActionStoreAndMultisampleResolve
3139 : MTLStoreActionMultisampleResolve;
3140 cbD->d->currentPassRpDesc.depthAttachment.resolveTexture = rtD->fb.dsResolveTex;
3141 if (rtD->fb.hasStencil) {
3142 cbD->d->currentPassRpDesc.stencilAttachment.resolveTexture = rtD->fb.dsResolveTex;
3143 cbD->d->currentPassRpDesc.stencilAttachment.storeAction = cbD->d->currentPassRpDesc.depthAttachment.storeAction;
3144 }
3145 }
3146 }
3147
3148 cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc];
3149
3150 cbD->resetPerPassState();
3151
3152 cbD->recordingPass = QMetalCommandBuffer::RenderPass;
3153 cbD->currentTarget = rt;
3154}
3155
3156void QRhiMetal::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
3157{
3158 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
3159 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
3160
3161 [cbD->d->currentRenderPassEncoder endEncoding];
3162
3163 cbD->recordingPass = QMetalCommandBuffer::NoPass;
3164 cbD->currentTarget = nullptr;
3165
3166 if (resourceUpdates)
3167 enqueueResourceUpdates(cb, resourceUpdates);
3168}
3169
3170void QRhiMetal::beginComputePass(QRhiCommandBuffer *cb,
3171 QRhiResourceUpdateBatch *resourceUpdates,
3172 QRhiCommandBuffer::BeginPassFlags)
3173{
3174 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
3175 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::NoPass);
3176
3177 if (resourceUpdates)
3178 enqueueResourceUpdates(cb, resourceUpdates);
3179
3180 cbD->d->currentComputePassEncoder = [cbD->d->cb computeCommandEncoder];
3181 cbD->resetPerPassState();
3182 cbD->recordingPass = QMetalCommandBuffer::ComputePass;
3183}
3184
3185void QRhiMetal::endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates)
3186{
3187 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
3188 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass);
3189
3190 [cbD->d->currentComputePassEncoder endEncoding];
3191 cbD->recordingPass = QMetalCommandBuffer::NoPass;
3192
3193 if (resourceUpdates)
3194 enqueueResourceUpdates(cb, resourceUpdates);
3195}
3196
3197void QRhiMetal::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps)
3198{
3199 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
3200 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass);
3201 QMetalComputePipeline *psD = QRHI_RES(QMetalComputePipeline, ps);
3202
3203 if (cbD->currentComputePipeline != psD || cbD->currentPipelineGeneration != psD->generation) {
3204 cbD->currentGraphicsPipeline = nullptr;
3205 cbD->currentComputePipeline = psD;
3206 cbD->currentPipelineGeneration = psD->generation;
3207
3208 [cbD->d->currentComputePassEncoder setComputePipelineState: psD->d->ps];
3209 }
3210
3211 psD->lastActiveFrameSlot = currentFrameSlot;
3212}
3213
3214void QRhiMetal::dispatch(QRhiCommandBuffer *cb, int x, int y, int z)
3215{
3216 QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
3217 Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass);
3218 QMetalComputePipeline *psD = QRHI_RES(QMetalComputePipeline, cbD->currentComputePipeline);
3219
3220 [cbD->d->currentComputePassEncoder dispatchThreadgroups: MTLSizeMake(NSUInteger(x), NSUInteger(y), NSUInteger(z))
3221 threadsPerThreadgroup: psD->d->localSize];
3222}
3223
3224static void qrhimtl_releaseBuffer(const QRhiMetalData::DeferredReleaseEntry &e)
3225{
3226 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i)
3227 [e.buffer.buffers[i] release];
3228}
3229
3230static void qrhimtl_releaseRenderBuffer(const QRhiMetalData::DeferredReleaseEntry &e)
3231{
3232 [e.renderbuffer.texture release];
3233}
3234
3235static void qrhimtl_releaseTexture(const QRhiMetalData::DeferredReleaseEntry &e)
3236{
3237 [e.texture.texture release];
3238 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i)
3239 [e.texture.stagingBuffers[i] release];
3240 for (int i = 0; i < QRhi::MAX_MIP_LEVELS; ++i)
3241 [e.texture.views[i] release];
3242}
3243
3244static void qrhimtl_releaseSampler(const QRhiMetalData::DeferredReleaseEntry &e)
3245{
3246 [e.sampler.samplerState release];
3247}
3248
3249void QRhiMetal::executeDeferredReleases(bool forced)
3250{
3251 for (int i = d->releaseQueue.count() - 1; i >= 0; --i) {
3252 const QRhiMetalData::DeferredReleaseEntry &e(d->releaseQueue[i]);
3253 if (forced || currentFrameSlot == e.lastActiveFrameSlot || e.lastActiveFrameSlot < 0) {
3254 switch (e.type) {
3255 case QRhiMetalData::DeferredReleaseEntry::Buffer:
3256 qrhimtl_releaseBuffer(e);
3257 break;
3258 case QRhiMetalData::DeferredReleaseEntry::RenderBuffer:
3259 qrhimtl_releaseRenderBuffer(e);
3260 break;
3261 case QRhiMetalData::DeferredReleaseEntry::Texture:
3262 qrhimtl_releaseTexture(e);
3263 break;
3264 case QRhiMetalData::DeferredReleaseEntry::Sampler:
3265 qrhimtl_releaseSampler(e);
3266 break;
3267 case QRhiMetalData::DeferredReleaseEntry::StagingBuffer:
3268 [e.stagingBuffer.buffer release];
3269 break;
3270 case QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline:
3271 [e.graphicsPipeline.pipelineState release];
3272 [e.graphicsPipeline.depthStencilState release];
3273 [e.graphicsPipeline.tessVertexComputeState[0] release];
3274 [e.graphicsPipeline.tessVertexComputeState[1] release];
3275 [e.graphicsPipeline.tessVertexComputeState[2] release];
3276 [e.graphicsPipeline.tessTessControlComputeState release];
3277 break;
3278 case QRhiMetalData::DeferredReleaseEntry::ComputePipeline:
3279 [e.computePipeline.pipelineState release];
3280 break;
3281 case QRhiMetalData::DeferredReleaseEntry::ShadingRateMap:
3282 [e.shadingRateMap.rateMap release];
3283 break;
3284 default:
3285 break;
3286 }
3287 d->releaseQueue.removeAt(i);
3288 }
3289 }
3290}
3291
3292void QRhiMetal::finishActiveReadbacks(bool forced)
3293{
3294 QVarLengthArray<std::function<void()>, 4> completedCallbacks;
3295
3296 for (int i = d->activeTextureReadbacks.count() - 1; i >= 0; --i) {
3297 const QRhiMetalData::TextureReadback &readback(d->activeTextureReadbacks[i]);
3298 if (forced || currentFrameSlot == readback.activeFrameSlot || readback.activeFrameSlot < 0) {
3299 readback.result->format = readback.format;
3300 readback.result->pixelSize = readback.pixelSize;
3301 readback.result->data.resize(int(readback.bufSize));
3302 void *p = [readback.buf contents];
3303 memcpy(readback.result->data.data(), p, readback.bufSize);
3304 [readback.buf release];
3305
3306 if (readback.result->completed)
3307 completedCallbacks.append(readback.result->completed);
3308
3309 d->activeTextureReadbacks.remove(i);
3310 }
3311 }
3312
3313 for (int i = d->activeBufferReadbacks.count() - 1; i >= 0; --i) {
3314 const QRhiMetalData::BufferReadback &readback(d->activeBufferReadbacks[i]);
3315 if (forced || currentFrameSlot == readback.activeFrameSlot
3316 || readback.activeFrameSlot < 0) {
3317 readback.result->data.resize(readback.readSize);
3318 char *p = reinterpret_cast<char *>([readback.buf contents]);
3319 Q_ASSERT(p);
3320 memcpy(readback.result->data.data(), p + readback.offset, size_t(readback.readSize));
3321
3322 if (readback.result->completed)
3323 completedCallbacks.append(readback.result->completed);
3324
3325 d->activeBufferReadbacks.remove(i);
3326 }
3327 }
3328
3329 for (auto f : completedCallbacks)
3330 f();
3331}
3332
3333QMetalBuffer::QMetalBuffer(QRhiImplementation *rhi, Type type, UsageFlags usage, quint32 size)
3334 : QRhiBuffer(rhi, type, usage, size),
3335 d(new QMetalBufferData)
3336{
3337 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i)
3338 d->buf[i] = nil;
3339}
3340
3341QMetalBuffer::~QMetalBuffer()
3342{
3343 destroy();
3344 delete d;
3345}
3346
3347void QMetalBuffer::destroy()
3348{
3349 if (!d->buf[0])
3350 return;
3351
3352 QRhiMetalData::DeferredReleaseEntry e;
3353 e.type = QRhiMetalData::DeferredReleaseEntry::Buffer;
3354 e.lastActiveFrameSlot = lastActiveFrameSlot;
3355
3356 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
3357 e.buffer.buffers[i] = d->buf[i];
3358 d->buf[i] = nil;
3359 d->pendingUpdates[i].clear();
3360 }
3361
3362 QRHI_RES_RHI(QRhiMetal);
3363 if (rhiD) {
3364 rhiD->d->releaseQueue.append(e);
3365 rhiD->unregisterResource(this);
3366 }
3367}
3368
3369bool QMetalBuffer::create()
3370{
3371 if (d->buf[0])
3372 destroy();
3373
3374 if (m_usage.testFlag(QRhiBuffer::StorageBuffer) && m_type == Dynamic) {
3375 qWarning("StorageBuffer cannot be combined with Dynamic");
3376 return false;
3377 }
3378
3379 const quint32 nonZeroSize = m_size <= 0 ? 256 : m_size;
3380 const quint32 roundedSize = m_usage.testFlag(QRhiBuffer::UniformBuffer) ? aligned(nonZeroSize, 256u) : nonZeroSize;
3381
3382 d->managed = false;
3383 MTLResourceOptions opts = MTLResourceStorageModeShared;
3384
3385 QRHI_RES_RHI(QRhiMetal);
3386#ifdef Q_OS_MACOS
3387 if (!rhiD->caps.isAppleGPU && m_type != Dynamic) {
3388 opts = MTLResourceStorageModeManaged;
3389 d->managed = true;
3390 }
3391#endif
3392
3393 // Have QMTL_FRAMES_IN_FLIGHT versions regardless of the type, for now.
3394 // This is because writing to a Managed buffer (which is what Immutable and
3395 // Static maps to on macOS) is not safe when another frame reading from the
3396 // same buffer is still in flight.
3397 d->slotted = !m_usage.testFlag(QRhiBuffer::StorageBuffer); // except for SSBOs written in the shader
3398 // and a special case for internal work buffers
3399 if (int(m_usage) == WorkBufPoolUsage)
3400 d->slotted = false;
3401
3402 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
3403 if (i == 0 || d->slotted) {
3404 d->buf[i] = [rhiD->d->dev newBufferWithLength: roundedSize options: opts];
3405 if (!m_objectName.isEmpty()) {
3406 if (!d->slotted) {
3407 d->buf[i].label = [NSString stringWithUTF8String: m_objectName.constData()];
3408 } else {
3409 const QByteArray name = m_objectName + '/' + QByteArray::number(i);
3410 d->buf[i].label = [NSString stringWithUTF8String: name.constData()];
3411 }
3412 }
3413 }
3414 }
3415
3416 lastActiveFrameSlot = -1;
3417 generation += 1;
3418 rhiD->registerResource(this);
3419 return true;
3420}
3421
3422QRhiBuffer::NativeBuffer QMetalBuffer::nativeBuffer()
3423{
3424 if (d->slotted) {
3425 NativeBuffer b;
3426 Q_ASSERT(sizeof(b.objects) / sizeof(b.objects[0]) >= size_t(QMTL_FRAMES_IN_FLIGHT));
3427 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
3428 QRHI_RES_RHI(QRhiMetal);
3429 rhiD->executeBufferHostWritesForSlot(this, i);
3430 b.objects[i] = &d->buf[i];
3431 }
3432 b.slotCount = QMTL_FRAMES_IN_FLIGHT;
3433 return b;
3434 }
3435 return { { &d->buf[0] }, 1 };
3436}
3437
3438char *QMetalBuffer::beginFullDynamicBufferUpdateForCurrentFrame()
3439{
3440 // Shortcut the entire buffer update mechanism and allow the client to do
3441 // the host writes directly to the buffer. This will lead to unexpected
3442 // results when combined with QRhiResourceUpdateBatch-based updates for the
3443 // buffer, but provides a fast path for dynamic buffers that have all their
3444 // content changed in every frame.
3445 Q_ASSERT(m_type == Dynamic);
3446 QRHI_RES_RHI(QRhiMetal);
3447 Q_ASSERT(rhiD->inFrame);
3448 const int slot = rhiD->currentFrameSlot;
3449 void *p = [d->buf[slot] contents];
3450 return static_cast<char *>(p);
3451}
3452
3453void QMetalBuffer::endFullDynamicBufferUpdateForCurrentFrame()
3454{
3455#ifdef Q_OS_MACOS
3456 if (d->managed) {
3457 QRHI_RES_RHI(QRhiMetal);
3458 const int slot = rhiD->currentFrameSlot;
3459 [d->buf[slot] didModifyRange: NSMakeRange(0, NSUInteger(m_size))];
3460 }
3461#endif
3462}
3463
3464static inline MTLPixelFormat toMetalTextureFormat(QRhiTexture::Format format, QRhiTexture::Flags flags, const QRhiMetal *d)
3465{
3466#ifndef Q_OS_MACOS
3467 Q_UNUSED(d);
3468#endif
3469
3470 const bool srgb = flags.testFlag(QRhiTexture::sRGB);
3471 switch (format) {
3472 case QRhiTexture::RGBA8:
3473 return srgb ? MTLPixelFormatRGBA8Unorm_sRGB : MTLPixelFormatRGBA8Unorm;
3474 case QRhiTexture::BGRA8:
3475 return srgb ? MTLPixelFormatBGRA8Unorm_sRGB : MTLPixelFormatBGRA8Unorm;
3476 case QRhiTexture::R8:
3477#ifdef Q_OS_MACOS
3478 return MTLPixelFormatR8Unorm;
3479#else
3480 return srgb ? MTLPixelFormatR8Unorm_sRGB : MTLPixelFormatR8Unorm;
3481#endif
3482 case QRhiTexture::R8SI:
3483 return MTLPixelFormatR8Sint;
3484 case QRhiTexture::R8UI:
3485 return MTLPixelFormatR8Uint;
3486 case QRhiTexture::RG8:
3487#ifdef Q_OS_MACOS
3488 return MTLPixelFormatRG8Unorm;
3489#else
3490 return srgb ? MTLPixelFormatRG8Unorm_sRGB : MTLPixelFormatRG8Unorm;
3491#endif
3492 case QRhiTexture::R16:
3493 return MTLPixelFormatR16Unorm;
3494 case QRhiTexture::RG16:
3495 return MTLPixelFormatRG16Unorm;
3496 case QRhiTexture::RED_OR_ALPHA8:
3497 return MTLPixelFormatR8Unorm;
3498
3499 case QRhiTexture::RGBA16F:
3500 return MTLPixelFormatRGBA16Float;
3501 case QRhiTexture::RGBA32F:
3502 return MTLPixelFormatRGBA32Float;
3503 case QRhiTexture::R16F:
3504 return MTLPixelFormatR16Float;
3505 case QRhiTexture::R32F:
3506 return MTLPixelFormatR32Float;
3507
3508 case QRhiTexture::RGB10A2:
3509 return MTLPixelFormatRGB10A2Unorm;
3510
3511 case QRhiTexture::R32SI:
3512 return MTLPixelFormatR32Sint;
3513 case QRhiTexture::R32UI:
3514 return MTLPixelFormatR32Uint;
3515 case QRhiTexture::RG32SI:
3516 return MTLPixelFormatRG32Sint;
3517 case QRhiTexture::RG32UI:
3518 return MTLPixelFormatRG32Uint;
3519 case QRhiTexture::RGBA32SI:
3520 return MTLPixelFormatRGBA32Sint;
3521 case QRhiTexture::RGBA32UI:
3522 return MTLPixelFormatRGBA32Uint;
3523
3524#ifdef Q_OS_MACOS
3525 case QRhiTexture::D16:
3526 return MTLPixelFormatDepth16Unorm;
3527 case QRhiTexture::D24:
3528 return [d->d->dev isDepth24Stencil8PixelFormatSupported] ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float;
3529 case QRhiTexture::D24S8:
3530 return [d->d->dev isDepth24Stencil8PixelFormatSupported] ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8;
3531#else
3532 case QRhiTexture::D16:
3533 return MTLPixelFormatDepth32Float;
3534 case QRhiTexture::D24:
3535 return MTLPixelFormatDepth32Float;
3536 case QRhiTexture::D24S8:
3537 return MTLPixelFormatDepth32Float_Stencil8;
3538#endif
3539 case QRhiTexture::D32F:
3540 return MTLPixelFormatDepth32Float;
3541 case QRhiTexture::D32FS8:
3542 return MTLPixelFormatDepth32Float_Stencil8;
3543
3544#ifdef Q_OS_MACOS
3545 case QRhiTexture::BC1:
3546 return srgb ? MTLPixelFormatBC1_RGBA_sRGB : MTLPixelFormatBC1_RGBA;
3547 case QRhiTexture::BC2:
3548 return srgb ? MTLPixelFormatBC2_RGBA_sRGB : MTLPixelFormatBC2_RGBA;
3549 case QRhiTexture::BC3:
3550 return srgb ? MTLPixelFormatBC3_RGBA_sRGB : MTLPixelFormatBC3_RGBA;
3551 case QRhiTexture::BC4:
3552 return MTLPixelFormatBC4_RUnorm;
3553 case QRhiTexture::BC5:
3554 qWarning("QRhiMetal does not support BC5");
3555 return MTLPixelFormatInvalid;
3556 case QRhiTexture::BC6H:
3557 return MTLPixelFormatBC6H_RGBUfloat;
3558 case QRhiTexture::BC7:
3559 return srgb ? MTLPixelFormatBC7_RGBAUnorm_sRGB : MTLPixelFormatBC7_RGBAUnorm;
3560#else
3561 case QRhiTexture::BC1:
3562 case QRhiTexture::BC2:
3563 case QRhiTexture::BC3:
3564 case QRhiTexture::BC4:
3565 case QRhiTexture::BC5:
3566 case QRhiTexture::BC6H:
3567 case QRhiTexture::BC7:
3568 qWarning("QRhiMetal: BCx compression not supported on this platform");
3569 return MTLPixelFormatInvalid;
3570#endif
3571
3572#ifndef Q_OS_MACOS
3573 case QRhiTexture::ETC2_RGB8:
3574 return srgb ? MTLPixelFormatETC2_RGB8_sRGB : MTLPixelFormatETC2_RGB8;
3575 case QRhiTexture::ETC2_RGB8A1:
3576 return srgb ? MTLPixelFormatETC2_RGB8A1_sRGB : MTLPixelFormatETC2_RGB8A1;
3577 case QRhiTexture::ETC2_RGBA8:
3578 return srgb ? MTLPixelFormatEAC_RGBA8_sRGB : MTLPixelFormatEAC_RGBA8;
3579
3580 case QRhiTexture::ASTC_4x4:
3581 return srgb ? MTLPixelFormatASTC_4x4_sRGB : MTLPixelFormatASTC_4x4_LDR;
3582 case QRhiTexture::ASTC_5x4:
3583 return srgb ? MTLPixelFormatASTC_5x4_sRGB : MTLPixelFormatASTC_5x4_LDR;
3584 case QRhiTexture::ASTC_5x5:
3585 return srgb ? MTLPixelFormatASTC_5x5_sRGB : MTLPixelFormatASTC_5x5_LDR;
3586 case QRhiTexture::ASTC_6x5:
3587 return srgb ? MTLPixelFormatASTC_6x5_sRGB : MTLPixelFormatASTC_6x5_LDR;
3588 case QRhiTexture::ASTC_6x6:
3589 return srgb ? MTLPixelFormatASTC_6x6_sRGB : MTLPixelFormatASTC_6x6_LDR;
3590 case QRhiTexture::ASTC_8x5:
3591 return srgb ? MTLPixelFormatASTC_8x5_sRGB : MTLPixelFormatASTC_8x5_LDR;
3592 case QRhiTexture::ASTC_8x6:
3593 return srgb ? MTLPixelFormatASTC_8x6_sRGB : MTLPixelFormatASTC_8x6_LDR;
3594 case QRhiTexture::ASTC_8x8:
3595 return srgb ? MTLPixelFormatASTC_8x8_sRGB : MTLPixelFormatASTC_8x8_LDR;
3596 case QRhiTexture::ASTC_10x5:
3597 return srgb ? MTLPixelFormatASTC_10x5_sRGB : MTLPixelFormatASTC_10x5_LDR;
3598 case QRhiTexture::ASTC_10x6:
3599 return srgb ? MTLPixelFormatASTC_10x6_sRGB : MTLPixelFormatASTC_10x6_LDR;
3600 case QRhiTexture::ASTC_10x8:
3601 return srgb ? MTLPixelFormatASTC_10x8_sRGB : MTLPixelFormatASTC_10x8_LDR;
3602 case QRhiTexture::ASTC_10x10:
3603 return srgb ? MTLPixelFormatASTC_10x10_sRGB : MTLPixelFormatASTC_10x10_LDR;
3604 case QRhiTexture::ASTC_12x10:
3605 return srgb ? MTLPixelFormatASTC_12x10_sRGB : MTLPixelFormatASTC_12x10_LDR;
3606 case QRhiTexture::ASTC_12x12:
3607 return srgb ? MTLPixelFormatASTC_12x12_sRGB : MTLPixelFormatASTC_12x12_LDR;
3608#else
3609 case QRhiTexture::ETC2_RGB8:
3610 if (d->caps.isAppleGPU)
3611 return srgb ? MTLPixelFormatETC2_RGB8_sRGB : MTLPixelFormatETC2_RGB8;
3612 qWarning("QRhiMetal: ETC2 compression not supported on this platform");
3613 return MTLPixelFormatInvalid;
3614 case QRhiTexture::ETC2_RGB8A1:
3615 if (d->caps.isAppleGPU)
3616 return srgb ? MTLPixelFormatETC2_RGB8A1_sRGB : MTLPixelFormatETC2_RGB8A1;
3617 qWarning("QRhiMetal: ETC2 compression not supported on this platform");
3618 return MTLPixelFormatInvalid;
3619 case QRhiTexture::ETC2_RGBA8:
3620 if (d->caps.isAppleGPU)
3621 return srgb ? MTLPixelFormatEAC_RGBA8_sRGB : MTLPixelFormatEAC_RGBA8;
3622 qWarning("QRhiMetal: ETC2 compression not supported on this platform");
3623 return MTLPixelFormatInvalid;
3624 case QRhiTexture::ASTC_4x4:
3625 if (d->caps.isAppleGPU)
3626 return srgb ? MTLPixelFormatASTC_4x4_sRGB : MTLPixelFormatASTC_4x4_LDR;
3627 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3628 return MTLPixelFormatInvalid;
3629 case QRhiTexture::ASTC_5x4:
3630 if (d->caps.isAppleGPU)
3631 return srgb ? MTLPixelFormatASTC_5x4_sRGB : MTLPixelFormatASTC_5x4_LDR;
3632 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3633 return MTLPixelFormatInvalid;
3634 case QRhiTexture::ASTC_5x5:
3635 if (d->caps.isAppleGPU)
3636 return srgb ? MTLPixelFormatASTC_5x5_sRGB : MTLPixelFormatASTC_5x5_LDR;
3637 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3638 return MTLPixelFormatInvalid;
3639 case QRhiTexture::ASTC_6x5:
3640 if (d->caps.isAppleGPU)
3641 return srgb ? MTLPixelFormatASTC_6x5_sRGB : MTLPixelFormatASTC_6x5_LDR;
3642 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3643 return MTLPixelFormatInvalid;
3644 case QRhiTexture::ASTC_6x6:
3645 if (d->caps.isAppleGPU)
3646 return srgb ? MTLPixelFormatASTC_6x6_sRGB : MTLPixelFormatASTC_6x6_LDR;
3647 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3648 return MTLPixelFormatInvalid;
3649 case QRhiTexture::ASTC_8x5:
3650 if (d->caps.isAppleGPU)
3651 return srgb ? MTLPixelFormatASTC_8x5_sRGB : MTLPixelFormatASTC_8x5_LDR;
3652 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3653 return MTLPixelFormatInvalid;
3654 case QRhiTexture::ASTC_8x6:
3655 if (d->caps.isAppleGPU)
3656 return srgb ? MTLPixelFormatASTC_8x6_sRGB : MTLPixelFormatASTC_8x6_LDR;
3657 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3658 return MTLPixelFormatInvalid;
3659 case QRhiTexture::ASTC_8x8:
3660 if (d->caps.isAppleGPU)
3661 return srgb ? MTLPixelFormatASTC_8x8_sRGB : MTLPixelFormatASTC_8x8_LDR;
3662 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3663 return MTLPixelFormatInvalid;
3664 case QRhiTexture::ASTC_10x5:
3665 if (d->caps.isAppleGPU)
3666 return srgb ? MTLPixelFormatASTC_10x5_sRGB : MTLPixelFormatASTC_10x5_LDR;
3667 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3668 return MTLPixelFormatInvalid;
3669 case QRhiTexture::ASTC_10x6:
3670 if (d->caps.isAppleGPU)
3671 return srgb ? MTLPixelFormatASTC_10x6_sRGB : MTLPixelFormatASTC_10x6_LDR;
3672 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3673 return MTLPixelFormatInvalid;
3674 case QRhiTexture::ASTC_10x8:
3675 if (d->caps.isAppleGPU)
3676 return srgb ? MTLPixelFormatASTC_10x8_sRGB : MTLPixelFormatASTC_10x8_LDR;
3677 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3678 return MTLPixelFormatInvalid;
3679 case QRhiTexture::ASTC_10x10:
3680 if (d->caps.isAppleGPU)
3681 return srgb ? MTLPixelFormatASTC_10x10_sRGB : MTLPixelFormatASTC_10x10_LDR;
3682 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3683 return MTLPixelFormatInvalid;
3684 case QRhiTexture::ASTC_12x10:
3685 if (d->caps.isAppleGPU)
3686 return srgb ? MTLPixelFormatASTC_12x10_sRGB : MTLPixelFormatASTC_12x10_LDR;
3687 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3688 return MTLPixelFormatInvalid;
3689 case QRhiTexture::ASTC_12x12:
3690 if (d->caps.isAppleGPU)
3691 return srgb ? MTLPixelFormatASTC_12x12_sRGB : MTLPixelFormatASTC_12x12_LDR;
3692 qWarning("QRhiMetal: ASTC compression not supported on this platform");
3693 return MTLPixelFormatInvalid;
3694#endif
3695
3696 default:
3697 Q_UNREACHABLE();
3698 return MTLPixelFormatInvalid;
3699 }
3700}
3701
3702QMetalRenderBuffer::QMetalRenderBuffer(QRhiImplementation *rhi, Type type, const QSize &pixelSize,
3703 int sampleCount, QRhiRenderBuffer::Flags flags,
3704 QRhiTexture::Format backingFormatHint)
3705 : QRhiRenderBuffer(rhi, type, pixelSize, sampleCount, flags, backingFormatHint),
3706 d(new QMetalRenderBufferData)
3707{
3708}
3709
3710QMetalRenderBuffer::~QMetalRenderBuffer()
3711{
3712 destroy();
3713 delete d;
3714}
3715
3716void QMetalRenderBuffer::destroy()
3717{
3718 if (!d->tex)
3719 return;
3720
3721 QRhiMetalData::DeferredReleaseEntry e;
3722 e.type = QRhiMetalData::DeferredReleaseEntry::RenderBuffer;
3723 e.lastActiveFrameSlot = lastActiveFrameSlot;
3724
3725 e.renderbuffer.texture = d->tex;
3726 d->tex = nil;
3727
3728 QRHI_RES_RHI(QRhiMetal);
3729 if (rhiD) {
3730 rhiD->d->releaseQueue.append(e);
3731 rhiD->unregisterResource(this);
3732 }
3733}
3734
3735bool QMetalRenderBuffer::create()
3736{
3737 if (d->tex)
3738 destroy();
3739
3740 if (m_pixelSize.isEmpty())
3741 return false;
3742
3743 QRHI_RES_RHI(QRhiMetal);
3744 samples = rhiD->effectiveSampleCount(m_sampleCount);
3745
3746 MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init];
3747 desc.textureType = samples > 1 ? MTLTextureType2DMultisample : MTLTextureType2D;
3748 desc.width = NSUInteger(m_pixelSize.width());
3749 desc.height = NSUInteger(m_pixelSize.height());
3750 if (samples > 1)
3751 desc.sampleCount = NSUInteger(samples);
3752 desc.resourceOptions = MTLResourceStorageModePrivate;
3753 desc.usage = MTLTextureUsageRenderTarget;
3754
3755 switch (m_type) {
3756 case DepthStencil:
3757#ifdef Q_OS_MACOS
3758 if (rhiD->caps.isAppleGPU) {
3759 desc.storageMode = MTLStorageModeMemoryless;
3760 d->format = MTLPixelFormatDepth32Float_Stencil8;
3761 } else {
3762 desc.storageMode = MTLStorageModePrivate;
3763 d->format = rhiD->d->dev.depth24Stencil8PixelFormatSupported
3764 ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8;
3765 }
3766#else
3767 desc.storageMode = MTLStorageModeMemoryless;
3768 d->format = MTLPixelFormatDepth32Float_Stencil8;
3769#endif
3770 desc.pixelFormat = d->format;
3771 break;
3772 case Color:
3773 desc.storageMode = MTLStorageModePrivate;
3774 if (m_backingFormatHint != QRhiTexture::UnknownFormat)
3775 d->format = toMetalTextureFormat(m_backingFormatHint, {}, rhiD);
3776 else
3777 d->format = MTLPixelFormatRGBA8Unorm;
3778 desc.pixelFormat = d->format;
3779 break;
3780 default:
3781 Q_UNREACHABLE();
3782 break;
3783 }
3784
3785 d->tex = [rhiD->d->dev newTextureWithDescriptor: desc];
3786 [desc release];
3787
3788 if (!m_objectName.isEmpty())
3789 d->tex.label = [NSString stringWithUTF8String: m_objectName.constData()];
3790
3791 lastActiveFrameSlot = -1;
3792 generation += 1;
3793 rhiD->registerResource(this);
3794 return true;
3795}
3796
3797QRhiTexture::Format QMetalRenderBuffer::backingFormat() const
3798{
3799 if (m_backingFormatHint != QRhiTexture::UnknownFormat)
3800 return m_backingFormatHint;
3801 else
3802 return m_type == Color ? QRhiTexture::RGBA8 : QRhiTexture::UnknownFormat;
3803}
3804
3805QMetalTexture::QMetalTexture(QRhiImplementation *rhi, Format format, const QSize &pixelSize, int depth,
3806 int arraySize, int sampleCount, Flags flags)
3807 : QRhiTexture(rhi, format, pixelSize, depth, arraySize, sampleCount, flags),
3808 d(new QMetalTextureData(this))
3809{
3810 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i)
3811 d->stagingBuf[i] = nil;
3812
3813 for (int i = 0; i < QRhi::MAX_MIP_LEVELS; ++i)
3814 d->perLevelViews[i] = nil;
3815}
3816
3817QMetalTexture::~QMetalTexture()
3818{
3819 destroy();
3820 delete d;
3821}
3822
3823void QMetalTexture::destroy()
3824{
3825 if (!d->tex)
3826 return;
3827
3828 QRhiMetalData::DeferredReleaseEntry e;
3829 e.type = QRhiMetalData::DeferredReleaseEntry::Texture;
3830 e.lastActiveFrameSlot = lastActiveFrameSlot;
3831
3832 e.texture.texture = d->owns ? d->tex : nil;
3833 d->tex = nil;
3834
3835 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
3836 e.texture.stagingBuffers[i] = d->stagingBuf[i];
3837 d->stagingBuf[i] = nil;
3838 }
3839
3840 for (int i = 0; i < QRhi::MAX_MIP_LEVELS; ++i) {
3841 e.texture.views[i] = d->perLevelViews[i];
3842 d->perLevelViews[i] = nil;
3843 }
3844
3845 QRHI_RES_RHI(QRhiMetal);
3846 if (rhiD) {
3847 rhiD->d->releaseQueue.append(e);
3848 rhiD->unregisterResource(this);
3849 }
3850}
3851
3852bool QMetalTexture::prepareCreate(QSize *adjustedSize)
3853{
3854 if (d->tex)
3855 destroy();
3856
3857 const bool isCube = m_flags.testFlag(CubeMap);
3858 const bool is3D = m_flags.testFlag(ThreeDimensional);
3859 const bool isArray = m_flags.testFlag(TextureArray);
3860 const bool hasMipMaps = m_flags.testFlag(MipMapped);
3861 const bool is1D = m_flags.testFlag(OneDimensional);
3862
3863 const QSize size = is1D ? QSize(qMax(1, m_pixelSize.width()), 1)
3864 : (m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize);
3865
3866 QRHI_RES_RHI(QRhiMetal);
3867 d->format = toMetalTextureFormat(m_format, m_flags, rhiD);
3868 mipLevelCount = hasMipMaps ? rhiD->q->mipLevelsForSize(size) : 1;
3869 samples = rhiD->effectiveSampleCount(m_sampleCount);
3870 if (samples > 1) {
3871 if (isCube) {
3872 qWarning("Cubemap texture cannot be multisample");
3873 return false;
3874 }
3875 if (is3D) {
3876 qWarning("3D texture cannot be multisample");
3877 return false;
3878 }
3879 if (hasMipMaps) {
3880 qWarning("Multisample texture cannot have mipmaps");
3881 return false;
3882 }
3883 }
3884 if (isCube && is3D) {
3885 qWarning("Texture cannot be both cube and 3D");
3886 return false;
3887 }
3888 if (isArray && is3D) {
3889 qWarning("Texture cannot be both array and 3D");
3890 return false;
3891 }
3892 if (is1D && is3D) {
3893 qWarning("Texture cannot be both 1D and 3D");
3894 return false;
3895 }
3896 if (is1D && isCube) {
3897 qWarning("Texture cannot be both 1D and cube");
3898 return false;
3899 }
3900 if (m_depth > 1 && !is3D) {
3901 qWarning("Texture cannot have a depth of %d when it is not 3D", m_depth);
3902 return false;
3903 }
3904 if (m_arraySize > 0 && !isArray) {
3905 qWarning("Texture cannot have an array size of %d when it is not an array", m_arraySize);
3906 return false;
3907 }
3908 if (m_arraySize < 1 && isArray) {
3909 qWarning("Texture is an array but array size is %d", m_arraySize);
3910 return false;
3911 }
3912
3913 if (adjustedSize)
3914 *adjustedSize = size;
3915
3916 return true;
3917}
3918
3919bool QMetalTexture::create()
3920{
3921 QSize size;
3922 if (!prepareCreate(&size))
3923 return false;
3924
3925 MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init];
3926
3927 const bool isCube = m_flags.testFlag(CubeMap);
3928 const bool is3D = m_flags.testFlag(ThreeDimensional);
3929 const bool isArray = m_flags.testFlag(TextureArray);
3930 const bool is1D = m_flags.testFlag(OneDimensional);
3931 if (isCube) {
3932 desc.textureType = MTLTextureTypeCube;
3933 } else if (is3D) {
3934 desc.textureType = MTLTextureType3D;
3935 } else if (is1D) {
3936 desc.textureType = isArray ? MTLTextureType1DArray : MTLTextureType1D;
3937 } else if (isArray) {
3938 desc.textureType = samples > 1 ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray;
3939 } else {
3940 desc.textureType = samples > 1 ? MTLTextureType2DMultisample : MTLTextureType2D;
3941 }
3942 desc.pixelFormat = d->format;
3943 desc.width = NSUInteger(size.width());
3944 desc.height = NSUInteger(size.height());
3945 desc.depth = is3D ? qMax(1, m_depth) : 1;
3946 desc.mipmapLevelCount = NSUInteger(mipLevelCount);
3947 if (samples > 1)
3948 desc.sampleCount = NSUInteger(samples);
3949 if (isArray)
3950 desc.arrayLength = NSUInteger(qMax(0, m_arraySize));
3951 desc.resourceOptions = MTLResourceStorageModePrivate;
3952 desc.storageMode = MTLStorageModePrivate;
3953 desc.usage = MTLTextureUsageShaderRead;
3954 if (m_flags.testFlag(RenderTarget))
3955 desc.usage |= MTLTextureUsageRenderTarget;
3956 if (m_flags.testFlag(UsedWithLoadStore))
3957 desc.usage |= MTLTextureUsageShaderWrite;
3958
3959 QRHI_RES_RHI(QRhiMetal);
3960 d->tex = [rhiD->d->dev newTextureWithDescriptor: desc];
3961 [desc release];
3962
3963 if (!m_objectName.isEmpty())
3964 d->tex.label = [NSString stringWithUTF8String: m_objectName.constData()];
3965
3966 d->owns = true;
3967
3968 lastActiveFrameSlot = -1;
3969 generation += 1;
3970 rhiD->registerResource(this);
3971 return true;
3972}
3973
3974bool QMetalTexture::createFrom(QRhiTexture::NativeTexture src)
3975{
3976 id<MTLTexture> tex = id<MTLTexture>(src.object);
3977 if (tex == 0)
3978 return false;
3979
3980 if (!prepareCreate())
3981 return false;
3982
3983 d->tex = tex;
3984
3985 d->owns = false;
3986
3987 lastActiveFrameSlot = -1;
3988 generation += 1;
3989 QRHI_RES_RHI(QRhiMetal);
3990 rhiD->registerResource(this);
3991 return true;
3992}
3993
3994QRhiTexture::NativeTexture QMetalTexture::nativeTexture()
3995{
3996 return {quint64(d->tex), 0};
3997}
3998
3999id<MTLTexture> QMetalTextureData::viewForLevel(int level)
4000{
4001 Q_ASSERT(level >= 0 && level < int(q->mipLevelCount));
4002 if (perLevelViews[level])
4003 return perLevelViews[level];
4004
4005 const MTLTextureType type = [tex textureType];
4006 const bool isCube = q->m_flags.testFlag(QRhiTexture::CubeMap);
4007 const bool isArray = q->m_flags.testFlag(QRhiTexture::TextureArray);
4008 id<MTLTexture> view = [tex newTextureViewWithPixelFormat: format textureType: type
4009 levels: NSMakeRange(NSUInteger(level), 1)
4010 slices: NSMakeRange(0, isCube ? 6 : (isArray ? qMax(0, q->m_arraySize) : 1))];
4011
4012 perLevelViews[level] = view;
4013 return view;
4014}
4015
4016QMetalSampler::QMetalSampler(QRhiImplementation *rhi, Filter magFilter, Filter minFilter, Filter mipmapMode,
4017 AddressMode u, AddressMode v, AddressMode w)
4018 : QRhiSampler(rhi, magFilter, minFilter, mipmapMode, u, v, w),
4019 d(new QMetalSamplerData)
4020{
4021}
4022
4023QMetalSampler::~QMetalSampler()
4024{
4025 destroy();
4026 delete d;
4027}
4028
4029void QMetalSampler::destroy()
4030{
4031 if (!d->samplerState)
4032 return;
4033
4034 QRhiMetalData::DeferredReleaseEntry e;
4035 e.type = QRhiMetalData::DeferredReleaseEntry::Sampler;
4036 e.lastActiveFrameSlot = lastActiveFrameSlot;
4037
4038 e.sampler.samplerState = d->samplerState;
4039 d->samplerState = nil;
4040
4041 QRHI_RES_RHI(QRhiMetal);
4042 if (rhiD) {
4043 rhiD->d->releaseQueue.append(e);
4044 rhiD->unregisterResource(this);
4045 }
4046}
4047
4048static inline MTLSamplerMinMagFilter toMetalFilter(QRhiSampler::Filter f)
4049{
4050 switch (f) {
4051 case QRhiSampler::Nearest:
4052 return MTLSamplerMinMagFilterNearest;
4053 case QRhiSampler::Linear:
4054 return MTLSamplerMinMagFilterLinear;
4055 default:
4056 Q_UNREACHABLE();
4057 return MTLSamplerMinMagFilterNearest;
4058 }
4059}
4060
4061static inline MTLSamplerMipFilter toMetalMipmapMode(QRhiSampler::Filter f)
4062{
4063 switch (f) {
4064 case QRhiSampler::None:
4065 return MTLSamplerMipFilterNotMipmapped;
4066 case QRhiSampler::Nearest:
4067 return MTLSamplerMipFilterNearest;
4068 case QRhiSampler::Linear:
4069 return MTLSamplerMipFilterLinear;
4070 default:
4071 Q_UNREACHABLE();
4072 return MTLSamplerMipFilterNotMipmapped;
4073 }
4074}
4075
4076static inline MTLSamplerAddressMode toMetalAddressMode(QRhiSampler::AddressMode m)
4077{
4078 switch (m) {
4079 case QRhiSampler::Repeat:
4080 return MTLSamplerAddressModeRepeat;
4081 case QRhiSampler::ClampToEdge:
4082 return MTLSamplerAddressModeClampToEdge;
4083 case QRhiSampler::Mirror:
4084 return MTLSamplerAddressModeMirrorRepeat;
4085 default:
4086 Q_UNREACHABLE();
4087 return MTLSamplerAddressModeClampToEdge;
4088 }
4089}
4090
4091static inline MTLCompareFunction toMetalTextureCompareFunction(QRhiSampler::CompareOp op)
4092{
4093 switch (op) {
4094 case QRhiSampler::Never:
4095 return MTLCompareFunctionNever;
4096 case QRhiSampler::Less:
4097 return MTLCompareFunctionLess;
4098 case QRhiSampler::Equal:
4099 return MTLCompareFunctionEqual;
4100 case QRhiSampler::LessOrEqual:
4101 return MTLCompareFunctionLessEqual;
4102 case QRhiSampler::Greater:
4103 return MTLCompareFunctionGreater;
4104 case QRhiSampler::NotEqual:
4105 return MTLCompareFunctionNotEqual;
4106 case QRhiSampler::GreaterOrEqual:
4107 return MTLCompareFunctionGreaterEqual;
4108 case QRhiSampler::Always:
4109 return MTLCompareFunctionAlways;
4110 default:
4111 Q_UNREACHABLE();
4112 return MTLCompareFunctionNever;
4113 }
4114}
4115
4116bool QMetalSampler::create()
4117{
4118 if (d->samplerState)
4119 destroy();
4120
4121 MTLSamplerDescriptor *desc = [[MTLSamplerDescriptor alloc] init];
4122 desc.minFilter = toMetalFilter(m_minFilter);
4123 desc.magFilter = toMetalFilter(m_magFilter);
4124 desc.mipFilter = toMetalMipmapMode(m_mipmapMode);
4125 desc.sAddressMode = toMetalAddressMode(m_addressU);
4126 desc.tAddressMode = toMetalAddressMode(m_addressV);
4127 desc.rAddressMode = toMetalAddressMode(m_addressW);
4128 desc.compareFunction = toMetalTextureCompareFunction(m_compareOp);
4129
4130 QRHI_RES_RHI(QRhiMetal);
4131 d->samplerState = [rhiD->d->dev newSamplerStateWithDescriptor: desc];
4132 [desc release];
4133
4134 lastActiveFrameSlot = -1;
4135 generation += 1;
4136 rhiD->registerResource(this);
4137 return true;
4138}
4139
4140QMetalShadingRateMap::QMetalShadingRateMap(QRhiImplementation *rhi)
4141 : QRhiShadingRateMap(rhi),
4142 d(new QMetalShadingRateMapData)
4143{
4144}
4145
4146QMetalShadingRateMap::~QMetalShadingRateMap()
4147{
4148 destroy();
4149 delete d;
4150}
4151
4152void QMetalShadingRateMap::destroy()
4153{
4154 if (!d->rateMap)
4155 return;
4156
4157 QRhiMetalData::DeferredReleaseEntry e;
4158 e.type = QRhiMetalData::DeferredReleaseEntry::ShadingRateMap;
4159 e.lastActiveFrameSlot = lastActiveFrameSlot;
4160
4161 e.shadingRateMap.rateMap = d->rateMap;
4162 d->rateMap = nil;
4163
4164 QRHI_RES_RHI(QRhiMetal);
4165 if (rhiD) {
4166 rhiD->d->releaseQueue.append(e);
4167 rhiD->unregisterResource(this);
4168 }
4169}
4170
4171bool QMetalShadingRateMap::createFrom(NativeShadingRateMap src)
4172{
4173 if (d->rateMap)
4174 destroy();
4175
4176 d->rateMap = (id<MTLRasterizationRateMap>) (quintptr(src.object));
4177 if (!d->rateMap)
4178 return false;
4179
4180 [d->rateMap retain];
4181
4182 lastActiveFrameSlot = -1;
4183 generation += 1;
4184 QRHI_RES_RHI(QRhiMetal);
4185 rhiD->registerResource(this);
4186 return true;
4187}
4188
4189// dummy, no Vulkan-style RenderPass+Framebuffer concept here.
4190// We do have MTLRenderPassDescriptor of course, but it will be created on the fly for each pass.
4191QMetalRenderPassDescriptor::QMetalRenderPassDescriptor(QRhiImplementation *rhi)
4192 : QRhiRenderPassDescriptor(rhi)
4193{
4194 serializedFormatData.reserve(16);
4195}
4196
4197QMetalRenderPassDescriptor::~QMetalRenderPassDescriptor()
4198{
4199 destroy();
4200}
4201
4202void QMetalRenderPassDescriptor::destroy()
4203{
4204 QRHI_RES_RHI(QRhiMetal);
4205 if (rhiD)
4206 rhiD->unregisterResource(this);
4207}
4208
4209bool QMetalRenderPassDescriptor::isCompatible(const QRhiRenderPassDescriptor *other) const
4210{
4211 if (!other)
4212 return false;
4213
4214 const QMetalRenderPassDescriptor *o = QRHI_RES(const QMetalRenderPassDescriptor, other);
4215
4216 if (colorAttachmentCount != o->colorAttachmentCount)
4217 return false;
4218
4219 if (hasDepthStencil != o->hasDepthStencil)
4220 return false;
4221
4222 for (int i = 0; i < colorAttachmentCount; ++i) {
4223 if (colorFormat[i] != o->colorFormat[i])
4224 return false;
4225 }
4226
4227 if (hasDepthStencil) {
4228 if (dsFormat != o->dsFormat)
4229 return false;
4230 }
4231
4232 if (hasShadingRateMap != o->hasShadingRateMap)
4233 return false;
4234
4235 return true;
4236}
4237
4238void QMetalRenderPassDescriptor::updateSerializedFormat()
4239{
4240 serializedFormatData.clear();
4241 auto p = std::back_inserter(serializedFormatData);
4242
4243 *p++ = colorAttachmentCount;
4244 *p++ = hasDepthStencil;
4245 for (int i = 0; i < colorAttachmentCount; ++i)
4246 *p++ = colorFormat[i];
4247 *p++ = hasDepthStencil ? dsFormat : 0;
4248 *p++ = hasShadingRateMap;
4249}
4250
4251QRhiRenderPassDescriptor *QMetalRenderPassDescriptor::newCompatibleRenderPassDescriptor() const
4252{
4253 QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi);
4254 rpD->colorAttachmentCount = colorAttachmentCount;
4255 rpD->hasDepthStencil = hasDepthStencil;
4256 memcpy(rpD->colorFormat, colorFormat, sizeof(colorFormat));
4257 rpD->dsFormat = dsFormat;
4258 rpD->hasShadingRateMap = hasShadingRateMap;
4259
4260 rpD->updateSerializedFormat();
4261
4262 QRHI_RES_RHI(QRhiMetal);
4263 rhiD->registerResource(rpD, false);
4264 return rpD;
4265}
4266
4267QVector<quint32> QMetalRenderPassDescriptor::serializedFormat() const
4268{
4269 return serializedFormatData;
4270}
4271
4272QMetalSwapChainRenderTarget::QMetalSwapChainRenderTarget(QRhiImplementation *rhi, QRhiSwapChain *swapchain)
4273 : QRhiSwapChainRenderTarget(rhi, swapchain),
4274 d(new QMetalRenderTargetData)
4275{
4276}
4277
4278QMetalSwapChainRenderTarget::~QMetalSwapChainRenderTarget()
4279{
4280 destroy();
4281 delete d;
4282}
4283
4284void QMetalSwapChainRenderTarget::destroy()
4285{
4286 // nothing to do here
4287}
4288
4289QSize QMetalSwapChainRenderTarget::pixelSize() const
4290{
4291 return d->pixelSize;
4292}
4293
4294float QMetalSwapChainRenderTarget::devicePixelRatio() const
4295{
4296 return d->dpr;
4297}
4298
4299int QMetalSwapChainRenderTarget::sampleCount() const
4300{
4301 return d->sampleCount;
4302}
4303
4304QMetalTextureRenderTarget::QMetalTextureRenderTarget(QRhiImplementation *rhi,
4305 const QRhiTextureRenderTargetDescription &desc,
4306 Flags flags)
4307 : QRhiTextureRenderTarget(rhi, desc, flags),
4308 d(new QMetalRenderTargetData)
4309{
4310}
4311
4312QMetalTextureRenderTarget::~QMetalTextureRenderTarget()
4313{
4314 destroy();
4315 delete d;
4316}
4317
4318void QMetalTextureRenderTarget::destroy()
4319{
4320 QRHI_RES_RHI(QRhiMetal);
4321 if (rhiD)
4322 rhiD->unregisterResource(this);
4323}
4324
4325QRhiRenderPassDescriptor *QMetalTextureRenderTarget::newCompatibleRenderPassDescriptor()
4326{
4327 const int colorAttachmentCount = int(m_desc.colorAttachmentCount());
4328 QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi);
4329 rpD->colorAttachmentCount = colorAttachmentCount;
4330 rpD->hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture();
4331
4332 for (int i = 0; i < colorAttachmentCount; ++i) {
4333 const QRhiColorAttachment *colorAtt = m_desc.colorAttachmentAt(i);
4334 QMetalTexture *texD = QRHI_RES(QMetalTexture, colorAtt->texture());
4335 QMetalRenderBuffer *rbD = QRHI_RES(QMetalRenderBuffer, colorAtt->renderBuffer());
4336 rpD->colorFormat[i] = int(texD ? texD->d->format : rbD->d->format);
4337 }
4338
4339 if (m_desc.depthTexture())
4340 rpD->dsFormat = int(QRHI_RES(QMetalTexture, m_desc.depthTexture())->d->format);
4341 else if (m_desc.depthStencilBuffer())
4342 rpD->dsFormat = int(QRHI_RES(QMetalRenderBuffer, m_desc.depthStencilBuffer())->d->format);
4343
4344 rpD->hasShadingRateMap = m_desc.shadingRateMap() != nullptr;
4345
4346 rpD->updateSerializedFormat();
4347
4348 QRHI_RES_RHI(QRhiMetal);
4349 rhiD->registerResource(rpD, false);
4350 return rpD;
4351}
4352
4353bool QMetalTextureRenderTarget::create()
4354{
4355 QRHI_RES_RHI(QRhiMetal);
4356 Q_ASSERT(m_desc.colorAttachmentCount() > 0 || m_desc.depthTexture());
4357 Q_ASSERT(!m_desc.depthStencilBuffer() || !m_desc.depthTexture());
4358 const bool hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture();
4359
4360 d->colorAttCount = 0;
4361 int attIndex = 0;
4362 for (auto it = m_desc.cbeginColorAttachments(), itEnd = m_desc.cendColorAttachments(); it != itEnd; ++it, ++attIndex) {
4363 d->colorAttCount += 1;
4364 QMetalTexture *texD = QRHI_RES(QMetalTexture, it->texture());
4365 QMetalRenderBuffer *rbD = QRHI_RES(QMetalRenderBuffer, it->renderBuffer());
4366 Q_ASSERT(texD || rbD);
4367 id<MTLTexture> dst = nil;
4368 bool is3D = false;
4369 if (texD) {
4370 dst = texD->d->tex;
4371 if (attIndex == 0) {
4372 d->pixelSize = rhiD->q->sizeForMipLevel(it->level(), texD->pixelSize());
4373 d->sampleCount = texD->samples;
4374 }
4375 is3D = texD->flags().testFlag(QRhiTexture::ThreeDimensional);
4376 } else if (rbD) {
4377 dst = rbD->d->tex;
4378 if (attIndex == 0) {
4379 d->pixelSize = rbD->pixelSize();
4380 d->sampleCount = rbD->samples;
4381 }
4382 }
4383 QMetalRenderTargetData::ColorAtt colorAtt;
4384 colorAtt.tex = dst;
4385 colorAtt.arrayLayer = is3D ? 0 : it->layer();
4386 colorAtt.slice = is3D ? it->layer() : 0;
4387 colorAtt.level = it->level();
4388 QMetalTexture *resTexD = QRHI_RES(QMetalTexture, it->resolveTexture());
4389 colorAtt.resolveTex = resTexD ? resTexD->d->tex : nil;
4390 colorAtt.resolveLayer = it->resolveLayer();
4391 colorAtt.resolveLevel = it->resolveLevel();
4392 d->fb.colorAtt[attIndex] = colorAtt;
4393 }
4394 d->dpr = 1;
4395
4396 if (hasDepthStencil) {
4397 if (m_desc.depthTexture()) {
4398 QMetalTexture *depthTexD = QRHI_RES(QMetalTexture, m_desc.depthTexture());
4399 d->fb.dsTex = depthTexD->d->tex;
4400 d->fb.hasStencil = rhiD->isStencilSupportingFormat(depthTexD->format());
4401 d->fb.depthNeedsStore = !m_flags.testFlag(DoNotStoreDepthStencilContents) && !m_desc.depthResolveTexture();
4402 d->fb.preserveDs = m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents);
4403 if (d->colorAttCount == 0) {
4404 d->pixelSize = depthTexD->pixelSize();
4405 d->sampleCount = depthTexD->samples;
4406 }
4407 } else {
4408 QMetalRenderBuffer *depthRbD = QRHI_RES(QMetalRenderBuffer, m_desc.depthStencilBuffer());
4409 d->fb.dsTex = depthRbD->d->tex;
4410 d->fb.hasStencil = true;
4411 d->fb.depthNeedsStore = false;
4412 d->fb.preserveDs = false;
4413 if (d->colorAttCount == 0) {
4414 d->pixelSize = depthRbD->pixelSize();
4415 d->sampleCount = depthRbD->samples;
4416 }
4417 }
4418 if (m_desc.depthResolveTexture()) {
4419 QMetalTexture *depthResolveTexD = QRHI_RES(QMetalTexture, m_desc.depthResolveTexture());
4420 d->fb.dsResolveTex = depthResolveTexD->d->tex;
4421 }
4422 d->dsAttCount = 1;
4423 } else {
4424 d->dsAttCount = 0;
4425 }
4426
4427 if (d->colorAttCount > 0)
4428 d->fb.preserveColor = m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents);
4429
4430 QRhiRenderTargetAttachmentTracker::updateResIdList<QMetalTexture, QMetalRenderBuffer>(m_desc, &d->currentResIdList);
4431
4432 rhiD->registerResource(this, false);
4433 return true;
4434}
4435
4436QSize QMetalTextureRenderTarget::pixelSize() const
4437{
4438 if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QMetalTexture, QMetalRenderBuffer>(m_desc, d->currentResIdList))
4439 const_cast<QMetalTextureRenderTarget *>(this)->create();
4440
4441 return d->pixelSize;
4442}
4443
4444float QMetalTextureRenderTarget::devicePixelRatio() const
4445{
4446 return d->dpr;
4447}
4448
4449int QMetalTextureRenderTarget::sampleCount() const
4450{
4451 return d->sampleCount;
4452}
4453
4454QMetalShaderResourceBindings::QMetalShaderResourceBindings(QRhiImplementation *rhi)
4455 : QRhiShaderResourceBindings(rhi)
4456{
4457}
4458
4459QMetalShaderResourceBindings::~QMetalShaderResourceBindings()
4460{
4461 destroy();
4462}
4463
4464void QMetalShaderResourceBindings::destroy()
4465{
4466 sortedBindings.clear();
4467 maxBinding = -1;
4468
4469 QRHI_RES_RHI(QRhiMetal);
4470 if (rhiD)
4471 rhiD->unregisterResource(this);
4472}
4473
4474bool QMetalShaderResourceBindings::create()
4475{
4476 if (!sortedBindings.isEmpty())
4477 destroy();
4478
4479 QRHI_RES_RHI(QRhiMetal);
4480 if (!rhiD->sanityCheckShaderResourceBindings(this))
4481 return false;
4482
4483 rhiD->updateLayoutDesc(this);
4484
4485 std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings));
4486 std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan);
4487 if (!sortedBindings.isEmpty())
4488 maxBinding = QRhiImplementation::shaderResourceBindingData(sortedBindings.last())->binding;
4489 else
4490 maxBinding = -1;
4491
4492 boundResourceData.resize(sortedBindings.count());
4493
4494 for (BoundResourceData &bd : boundResourceData)
4495 memset(&bd, 0, sizeof(BoundResourceData));
4496
4497 generation += 1;
4498 rhiD->registerResource(this, false);
4499 return true;
4500}
4501
4502void QMetalShaderResourceBindings::updateResources(UpdateFlags flags)
4503{
4504 sortedBindings.clear();
4505 std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings));
4506 if (!flags.testFlag(BindingsAreSorted))
4507 std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan);
4508
4509 for (BoundResourceData &bd : boundResourceData)
4510 memset(&bd, 0, sizeof(BoundResourceData));
4511
4512 generation += 1;
4513}
4514
4515QMetalGraphicsPipeline::QMetalGraphicsPipeline(QRhiImplementation *rhi)
4516 : QRhiGraphicsPipeline(rhi),
4517 d(new QMetalGraphicsPipelineData)
4518{
4519 d->q = this;
4520 d->tess.q = d;
4521}
4522
4523QMetalGraphicsPipeline::~QMetalGraphicsPipeline()
4524{
4525 destroy();
4526 delete d;
4527}
4528
4529void QMetalGraphicsPipeline::destroy()
4530{
4531 d->vs.destroy();
4532 d->fs.destroy();
4533
4534 d->tess.compVs[0].destroy();
4535 d->tess.compVs[1].destroy();
4536 d->tess.compVs[2].destroy();
4537
4538 d->tess.compTesc.destroy();
4539 d->tess.vertTese.destroy();
4540
4541 qDeleteAll(d->extraBufMgr.deviceLocalWorkBuffers);
4542 d->extraBufMgr.deviceLocalWorkBuffers.clear();
4543 qDeleteAll(d->extraBufMgr.hostVisibleWorkBuffers);
4544 d->extraBufMgr.hostVisibleWorkBuffers.clear();
4545
4546 delete d->bufferSizeBuffer;
4547 d->bufferSizeBuffer = nullptr;
4548
4549 if (!d->ps && !d->ds
4550 && !d->tess.vertexComputeState[0] && !d->tess.vertexComputeState[1] && !d->tess.vertexComputeState[2]
4551 && !d->tess.tessControlComputeState)
4552 {
4553 return;
4554 }
4555
4556 QRhiMetalData::DeferredReleaseEntry e;
4557 e.type = QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline;
4558 e.lastActiveFrameSlot = lastActiveFrameSlot;
4559 e.graphicsPipeline.pipelineState = d->ps;
4560 e.graphicsPipeline.depthStencilState = d->ds;
4561 e.graphicsPipeline.tessVertexComputeState = d->tess.vertexComputeState;
4562 e.graphicsPipeline.tessTessControlComputeState = d->tess.tessControlComputeState;
4563 d->ps = nil;
4564 d->ds = nil;
4565 d->tess.vertexComputeState = {};
4566 d->tess.tessControlComputeState = nil;
4567
4568 QRHI_RES_RHI(QRhiMetal);
4569 if (rhiD) {
4570 rhiD->d->releaseQueue.append(e);
4571 rhiD->unregisterResource(this);
4572 }
4573}
4574
4575static inline MTLVertexFormat toMetalAttributeFormat(QRhiVertexInputAttribute::Format format)
4576{
4577 switch (format) {
4578 case QRhiVertexInputAttribute::Float4:
4579 return MTLVertexFormatFloat4;
4580 case QRhiVertexInputAttribute::Float3:
4581 return MTLVertexFormatFloat3;
4582 case QRhiVertexInputAttribute::Float2:
4583 return MTLVertexFormatFloat2;
4584 case QRhiVertexInputAttribute::Float:
4585 return MTLVertexFormatFloat;
4586 case QRhiVertexInputAttribute::UNormByte4:
4587 return MTLVertexFormatUChar4Normalized;
4588 case QRhiVertexInputAttribute::UNormByte2:
4589 return MTLVertexFormatUChar2Normalized;
4590 case QRhiVertexInputAttribute::UNormByte:
4591 return MTLVertexFormatUCharNormalized;
4592 case QRhiVertexInputAttribute::UInt4:
4593 return MTLVertexFormatUInt4;
4594 case QRhiVertexInputAttribute::UInt3:
4595 return MTLVertexFormatUInt3;
4596 case QRhiVertexInputAttribute::UInt2:
4597 return MTLVertexFormatUInt2;
4598 case QRhiVertexInputAttribute::UInt:
4599 return MTLVertexFormatUInt;
4600 case QRhiVertexInputAttribute::SInt4:
4601 return MTLVertexFormatInt4;
4602 case QRhiVertexInputAttribute::SInt3:
4603 return MTLVertexFormatInt3;
4604 case QRhiVertexInputAttribute::SInt2:
4605 return MTLVertexFormatInt2;
4606 case QRhiVertexInputAttribute::SInt:
4607 return MTLVertexFormatInt;
4608 case QRhiVertexInputAttribute::Half4:
4609 return MTLVertexFormatHalf4;
4610 case QRhiVertexInputAttribute::Half3:
4611 return MTLVertexFormatHalf3;
4612 case QRhiVertexInputAttribute::Half2:
4613 return MTLVertexFormatHalf2;
4614 case QRhiVertexInputAttribute::Half:
4615 return MTLVertexFormatHalf;
4616 case QRhiVertexInputAttribute::UShort4:
4617 return MTLVertexFormatUShort4;
4618 case QRhiVertexInputAttribute::UShort3:
4619 return MTLVertexFormatUShort3;
4620 case QRhiVertexInputAttribute::UShort2:
4621 return MTLVertexFormatUShort2;
4622 case QRhiVertexInputAttribute::UShort:
4623 return MTLVertexFormatUShort;
4624 case QRhiVertexInputAttribute::SShort4:
4625 return MTLVertexFormatShort4;
4626 case QRhiVertexInputAttribute::SShort3:
4627 return MTLVertexFormatShort3;
4628 case QRhiVertexInputAttribute::SShort2:
4629 return MTLVertexFormatShort2;
4630 case QRhiVertexInputAttribute::SShort:
4631 return MTLVertexFormatShort;
4632 default:
4633 Q_UNREACHABLE();
4634 return MTLVertexFormatFloat4;
4635 }
4636}
4637
4638static inline MTLBlendFactor toMetalBlendFactor(QRhiGraphicsPipeline::BlendFactor f)
4639{
4640 switch (f) {
4641 case QRhiGraphicsPipeline::Zero:
4642 return MTLBlendFactorZero;
4643 case QRhiGraphicsPipeline::One:
4644 return MTLBlendFactorOne;
4645 case QRhiGraphicsPipeline::SrcColor:
4646 return MTLBlendFactorSourceColor;
4647 case QRhiGraphicsPipeline::OneMinusSrcColor:
4648 return MTLBlendFactorOneMinusSourceColor;
4649 case QRhiGraphicsPipeline::DstColor:
4650 return MTLBlendFactorDestinationColor;
4651 case QRhiGraphicsPipeline::OneMinusDstColor:
4652 return MTLBlendFactorOneMinusDestinationColor;
4653 case QRhiGraphicsPipeline::SrcAlpha:
4654 return MTLBlendFactorSourceAlpha;
4655 case QRhiGraphicsPipeline::OneMinusSrcAlpha:
4656 return MTLBlendFactorOneMinusSourceAlpha;
4657 case QRhiGraphicsPipeline::DstAlpha:
4658 return MTLBlendFactorDestinationAlpha;
4659 case QRhiGraphicsPipeline::OneMinusDstAlpha:
4660 return MTLBlendFactorOneMinusDestinationAlpha;
4661 case QRhiGraphicsPipeline::ConstantColor:
4662 return MTLBlendFactorBlendColor;
4663 case QRhiGraphicsPipeline::ConstantAlpha:
4664 return MTLBlendFactorBlendAlpha;
4665 case QRhiGraphicsPipeline::OneMinusConstantColor:
4666 return MTLBlendFactorOneMinusBlendColor;
4667 case QRhiGraphicsPipeline::OneMinusConstantAlpha:
4668 return MTLBlendFactorOneMinusBlendAlpha;
4669 case QRhiGraphicsPipeline::SrcAlphaSaturate:
4670 return MTLBlendFactorSourceAlphaSaturated;
4671 case QRhiGraphicsPipeline::Src1Color:
4672 return MTLBlendFactorSource1Color;
4673 case QRhiGraphicsPipeline::OneMinusSrc1Color:
4674 return MTLBlendFactorOneMinusSource1Color;
4675 case QRhiGraphicsPipeline::Src1Alpha:
4676 return MTLBlendFactorSource1Alpha;
4677 case QRhiGraphicsPipeline::OneMinusSrc1Alpha:
4678 return MTLBlendFactorOneMinusSource1Alpha;
4679 default:
4680 Q_UNREACHABLE();
4681 return MTLBlendFactorZero;
4682 }
4683}
4684
4685static inline MTLBlendOperation toMetalBlendOp(QRhiGraphicsPipeline::BlendOp op)
4686{
4687 switch (op) {
4688 case QRhiGraphicsPipeline::Add:
4689 return MTLBlendOperationAdd;
4690 case QRhiGraphicsPipeline::Subtract:
4691 return MTLBlendOperationSubtract;
4692 case QRhiGraphicsPipeline::ReverseSubtract:
4693 return MTLBlendOperationReverseSubtract;
4694 case QRhiGraphicsPipeline::Min:
4695 return MTLBlendOperationMin;
4696 case QRhiGraphicsPipeline::Max:
4697 return MTLBlendOperationMax;
4698 default:
4699 Q_UNREACHABLE();
4700 return MTLBlendOperationAdd;
4701 }
4702}
4703
4704static inline uint toMetalColorWriteMask(QRhiGraphicsPipeline::ColorMask c)
4705{
4706 uint f = 0;
4707 if (c.testFlag(QRhiGraphicsPipeline::R))
4708 f |= MTLColorWriteMaskRed;
4709 if (c.testFlag(QRhiGraphicsPipeline::G))
4710 f |= MTLColorWriteMaskGreen;
4711 if (c.testFlag(QRhiGraphicsPipeline::B))
4712 f |= MTLColorWriteMaskBlue;
4713 if (c.testFlag(QRhiGraphicsPipeline::A))
4714 f |= MTLColorWriteMaskAlpha;
4715 return f;
4716}
4717
4718static inline MTLCompareFunction toMetalCompareOp(QRhiGraphicsPipeline::CompareOp op)
4719{
4720 switch (op) {
4721 case QRhiGraphicsPipeline::Never:
4722 return MTLCompareFunctionNever;
4723 case QRhiGraphicsPipeline::Less:
4724 return MTLCompareFunctionLess;
4725 case QRhiGraphicsPipeline::Equal:
4726 return MTLCompareFunctionEqual;
4727 case QRhiGraphicsPipeline::LessOrEqual:
4728 return MTLCompareFunctionLessEqual;
4729 case QRhiGraphicsPipeline::Greater:
4730 return MTLCompareFunctionGreater;
4731 case QRhiGraphicsPipeline::NotEqual:
4732 return MTLCompareFunctionNotEqual;
4733 case QRhiGraphicsPipeline::GreaterOrEqual:
4734 return MTLCompareFunctionGreaterEqual;
4735 case QRhiGraphicsPipeline::Always:
4736 return MTLCompareFunctionAlways;
4737 default:
4738 Q_UNREACHABLE();
4739 return MTLCompareFunctionAlways;
4740 }
4741}
4742
4743static inline MTLStencilOperation toMetalStencilOp(QRhiGraphicsPipeline::StencilOp op)
4744{
4745 switch (op) {
4746 case QRhiGraphicsPipeline::StencilZero:
4747 return MTLStencilOperationZero;
4748 case QRhiGraphicsPipeline::Keep:
4749 return MTLStencilOperationKeep;
4750 case QRhiGraphicsPipeline::Replace:
4751 return MTLStencilOperationReplace;
4752 case QRhiGraphicsPipeline::IncrementAndClamp:
4753 return MTLStencilOperationIncrementClamp;
4754 case QRhiGraphicsPipeline::DecrementAndClamp:
4755 return MTLStencilOperationDecrementClamp;
4756 case QRhiGraphicsPipeline::Invert:
4757 return MTLStencilOperationInvert;
4758 case QRhiGraphicsPipeline::IncrementAndWrap:
4759 return MTLStencilOperationIncrementWrap;
4760 case QRhiGraphicsPipeline::DecrementAndWrap:
4761 return MTLStencilOperationDecrementWrap;
4762 default:
4763 Q_UNREACHABLE();
4764 return MTLStencilOperationKeep;
4765 }
4766}
4767
4768static inline MTLPrimitiveType toMetalPrimitiveType(QRhiGraphicsPipeline::Topology t)
4769{
4770 switch (t) {
4771 case QRhiGraphicsPipeline::Triangles:
4772 return MTLPrimitiveTypeTriangle;
4773 case QRhiGraphicsPipeline::TriangleStrip:
4774 return MTLPrimitiveTypeTriangleStrip;
4775 case QRhiGraphicsPipeline::Lines:
4776 return MTLPrimitiveTypeLine;
4777 case QRhiGraphicsPipeline::LineStrip:
4778 return MTLPrimitiveTypeLineStrip;
4779 case QRhiGraphicsPipeline::Points:
4780 return MTLPrimitiveTypePoint;
4781 default:
4782 Q_UNREACHABLE();
4783 return MTLPrimitiveTypeTriangle;
4784 }
4785}
4786
4787static inline MTLPrimitiveTopologyClass toMetalPrimitiveTopologyClass(QRhiGraphicsPipeline::Topology t)
4788{
4789 switch (t) {
4790 case QRhiGraphicsPipeline::Triangles:
4791 case QRhiGraphicsPipeline::TriangleStrip:
4792 case QRhiGraphicsPipeline::TriangleFan:
4793 return MTLPrimitiveTopologyClassTriangle;
4794 case QRhiGraphicsPipeline::Lines:
4795 case QRhiGraphicsPipeline::LineStrip:
4796 return MTLPrimitiveTopologyClassLine;
4797 case QRhiGraphicsPipeline::Points:
4798 return MTLPrimitiveTopologyClassPoint;
4799 default:
4800 Q_UNREACHABLE();
4801 return MTLPrimitiveTopologyClassTriangle;
4802 }
4803}
4804
4805static inline MTLCullMode toMetalCullMode(QRhiGraphicsPipeline::CullMode c)
4806{
4807 switch (c) {
4808 case QRhiGraphicsPipeline::None:
4809 return MTLCullModeNone;
4810 case QRhiGraphicsPipeline::Front:
4811 return MTLCullModeFront;
4812 case QRhiGraphicsPipeline::Back:
4813 return MTLCullModeBack;
4814 default:
4815 Q_UNREACHABLE();
4816 return MTLCullModeNone;
4817 }
4818}
4819
4820static inline MTLTriangleFillMode toMetalTriangleFillMode(QRhiGraphicsPipeline::PolygonMode mode)
4821{
4822 switch (mode) {
4823 case QRhiGraphicsPipeline::Fill:
4824 return MTLTriangleFillModeFill;
4825 case QRhiGraphicsPipeline::Line:
4826 return MTLTriangleFillModeLines;
4827 default:
4828 Q_UNREACHABLE();
4829 return MTLTriangleFillModeFill;
4830 }
4831}
4832
4833static inline MTLWinding toMetalTessellationWindingOrder(QShaderDescription::TessellationWindingOrder w)
4834{
4835 switch (w) {
4836 case QShaderDescription::CwTessellationWindingOrder:
4837 return MTLWindingClockwise;
4838 case QShaderDescription::CcwTessellationWindingOrder:
4839 return MTLWindingCounterClockwise;
4840 default:
4841 // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown
4842 return MTLWindingCounterClockwise;
4843 }
4844}
4845
4846static inline MTLTessellationPartitionMode toMetalTessellationPartitionMode(QShaderDescription::TessellationPartitioning p)
4847{
4848 switch (p) {
4849 case QShaderDescription::EqualTessellationPartitioning:
4850 return MTLTessellationPartitionModePow2;
4851 case QShaderDescription::FractionalEvenTessellationPartitioning:
4852 return MTLTessellationPartitionModeFractionalEven;
4853 case QShaderDescription::FractionalOddTessellationPartitioning:
4854 return MTLTessellationPartitionModeFractionalOdd;
4855 default:
4856 // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown
4857 return MTLTessellationPartitionModePow2;
4858 }
4859}
4860
4861static inline MTLLanguageVersion toMetalLanguageVersion(const QShaderVersion &version)
4862{
4863 int v = version.version();
4864 return MTLLanguageVersion(((v / 10) << 16) + (v % 10));
4865}
4866
4867id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Variant shaderVariant,
4868 QString *error, QByteArray *entryPoint, QShaderKey *activeKey)
4869{
4870 QVarLengthArray<int, 8> versions;
4871 if (@available(macOS 13, iOS 16, *))
4872 versions << 30;
4873 if (@available(macOS 12, iOS 15, *))
4874 versions << 24;
4875 versions << 23 << 22 << 21 << 20 << 12;
4876
4877 const QList<QShaderKey> shaders = shader.availableShaders();
4878
4879 QShaderKey key;
4880
4881 for (const int &version : versions) {
4882 key = { QShader::Source::MetalLibShader, version, shaderVariant };
4883 if (shaders.contains(key))
4884 break;
4885 }
4886
4887 QShaderCode mtllib = shader.shader(key);
4888 if (!mtllib.shader().isEmpty()) {
4889 dispatch_data_t data = dispatch_data_create(mtllib.shader().constData(),
4890 size_t(mtllib.shader().size()),
4891 dispatch_get_global_queue(0, 0),
4892 DISPATCH_DATA_DESTRUCTOR_DEFAULT);
4893 NSError *err = nil;
4894 id<MTLLibrary> lib = [dev newLibraryWithData: data error: &err];
4895 dispatch_release(data);
4896 if (!err) {
4897 *entryPoint = mtllib.entryPoint();
4898 *activeKey = key;
4899 return lib;
4900 } else {
4901 const QString msg = QString::fromNSString(err.localizedDescription);
4902 qWarning("Failed to load metallib from baked shader: %s", qPrintable(msg));
4903 }
4904 }
4905
4906 for (const int &version : versions) {
4907 key = { QShader::Source::MslShader, version, shaderVariant };
4908 if (shaders.contains(key))
4909 break;
4910 }
4911
4912 QShaderCode mslSource = shader.shader(key);
4913 if (mslSource.shader().isEmpty()) {
4914 qWarning() << "No MSL 2.0 or 1.2 code found in baked shader" << shader;
4915 return nil;
4916 }
4917
4918 NSString *src = [NSString stringWithUTF8String: mslSource.shader().constData()];
4919 MTLCompileOptions *opts = [[MTLCompileOptions alloc] init];
4920 opts.languageVersion = toMetalLanguageVersion(key.sourceVersion());
4921 NSError *err = nil;
4922 id<MTLLibrary> lib = [dev newLibraryWithSource: src options: opts error: &err];
4923 [opts release];
4924 // src is autoreleased
4925
4926 // if lib is null and err is non-null, we had errors (fail)
4927 // if lib is non-null and err is non-null, we had warnings (success)
4928 // if lib is non-null and err is null, there were no errors or warnings (success)
4929 if (!lib) {
4930 const QString msg = QString::fromNSString(err.localizedDescription);
4931 *error = msg;
4932 return nil;
4933 }
4934
4935 *entryPoint = mslSource.entryPoint();
4936 *activeKey = key;
4937 return lib;
4938}
4939
4940id<MTLFunction> QRhiMetalData::createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint)
4941{
4942 return [lib newFunctionWithName:[NSString stringWithUTF8String:entryPoint.constData()]];
4943}
4944
4945void QMetalGraphicsPipeline::setupAttachmentsInMetalRenderPassDescriptor(void *metalRpDesc, QMetalRenderPassDescriptor *rpD)
4946{
4947 MTLRenderPipelineDescriptor *rpDesc = reinterpret_cast<MTLRenderPipelineDescriptor *>(metalRpDesc);
4948
4949 if (rpD->colorAttachmentCount) {
4950 // defaults when no targetBlends are provided
4951 rpDesc.colorAttachments[0].pixelFormat = MTLPixelFormat(rpD->colorFormat[0]);
4952 rpDesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll;
4953 rpDesc.colorAttachments[0].blendingEnabled = false;
4954
4955 Q_ASSERT(m_targetBlends.count() == rpD->colorAttachmentCount
4956 || (m_targetBlends.isEmpty() && rpD->colorAttachmentCount == 1));
4957
4958 for (uint i = 0, ie = uint(m_targetBlends.count()); i != ie; ++i) {
4959 const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[int(i)]);
4960 rpDesc.colorAttachments[i].pixelFormat = MTLPixelFormat(rpD->colorFormat[i]);
4961 rpDesc.colorAttachments[i].blendingEnabled = b.enable;
4962 rpDesc.colorAttachments[i].sourceRGBBlendFactor = toMetalBlendFactor(b.srcColor);
4963 rpDesc.colorAttachments[i].destinationRGBBlendFactor = toMetalBlendFactor(b.dstColor);
4964 rpDesc.colorAttachments[i].rgbBlendOperation = toMetalBlendOp(b.opColor);
4965 rpDesc.colorAttachments[i].sourceAlphaBlendFactor = toMetalBlendFactor(b.srcAlpha);
4966 rpDesc.colorAttachments[i].destinationAlphaBlendFactor = toMetalBlendFactor(b.dstAlpha);
4967 rpDesc.colorAttachments[i].alphaBlendOperation = toMetalBlendOp(b.opAlpha);
4968 rpDesc.colorAttachments[i].writeMask = toMetalColorWriteMask(b.colorWrite);
4969 }
4970 }
4971
4972 if (rpD->hasDepthStencil) {
4973 // Must only be set when a depth-stencil buffer will actually be bound,
4974 // validation blows up otherwise.
4975 MTLPixelFormat fmt = MTLPixelFormat(rpD->dsFormat);
4976 rpDesc.depthAttachmentPixelFormat = fmt;
4977#if defined(Q_OS_MACOS)
4978 if (fmt != MTLPixelFormatDepth16Unorm && fmt != MTLPixelFormatDepth32Float)
4979#else
4980 if (fmt != MTLPixelFormatDepth32Float)
4981#endif
4982 rpDesc.stencilAttachmentPixelFormat = fmt;
4983 }
4984
4985 QRHI_RES_RHI(QRhiMetal);
4986 rpDesc.rasterSampleCount = NSUInteger(rhiD->effectiveSampleCount(m_sampleCount));
4987}
4988
4989void QMetalGraphicsPipeline::setupMetalDepthStencilDescriptor(void *metalDsDesc)
4990{
4991 MTLDepthStencilDescriptor *dsDesc = reinterpret_cast<MTLDepthStencilDescriptor *>(metalDsDesc);
4992
4993 dsDesc.depthCompareFunction = m_depthTest ? toMetalCompareOp(m_depthOp) : MTLCompareFunctionAlways;
4994 dsDesc.depthWriteEnabled = m_depthWrite;
4995 if (m_stencilTest) {
4996 dsDesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init];
4997 dsDesc.frontFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilFront.failOp);
4998 dsDesc.frontFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilFront.depthFailOp);
4999 dsDesc.frontFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilFront.passOp);
5000 dsDesc.frontFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilFront.compareOp);
5001 dsDesc.frontFaceStencil.readMask = m_stencilReadMask;
5002 dsDesc.frontFaceStencil.writeMask = m_stencilWriteMask;
5003
5004 dsDesc.backFaceStencil = [[MTLStencilDescriptor alloc] init];
5005 dsDesc.backFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilBack.failOp);
5006 dsDesc.backFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilBack.depthFailOp);
5007 dsDesc.backFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilBack.passOp);
5008 dsDesc.backFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilBack.compareOp);
5009 dsDesc.backFaceStencil.readMask = m_stencilReadMask;
5010 dsDesc.backFaceStencil.writeMask = m_stencilWriteMask;
5011 }
5012}
5013
5014void QMetalGraphicsPipeline::mapStates()
5015{
5016 d->winding = m_frontFace == CCW ? MTLWindingCounterClockwise : MTLWindingClockwise;
5017 d->cullMode = toMetalCullMode(m_cullMode);
5018 d->triangleFillMode = toMetalTriangleFillMode(m_polygonMode);
5019 d->depthBias = float(m_depthBias);
5020 d->slopeScaledDepthBias = m_slopeScaledDepthBias;
5021}
5022
5023void QMetalGraphicsPipelineData::setupVertexInputDescriptor(MTLVertexDescriptor *desc)
5024{
5025 // same binding space for vertex and constant buffers - work it around
5026 // should be in native resource binding not SPIR-V, but this will work anyway
5027 const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1;
5028
5029 QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout();
5030 for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes();
5031 it != itEnd; ++it)
5032 {
5033 const uint loc = uint(it->location());
5034 desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format()));
5035 desc.attributes[loc].offset = NSUInteger(it->offset());
5036 desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding());
5037 }
5038 int bindingIndex = 0;
5039 const NSUInteger viewCount = qMax<NSUInteger>(1, q->multiViewCount());
5040 for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings();
5041 it != itEnd; ++it, ++bindingIndex)
5042 {
5043 const uint layoutIdx = uint(firstVertexBinding + bindingIndex);
5044 desc.layouts[layoutIdx].stepFunction =
5045 it->classification() == QRhiVertexInputBinding::PerInstance
5046 ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
5047 desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate());
5048 if (desc.layouts[layoutIdx].stepFunction == MTLVertexStepFunctionPerInstance)
5049 desc.layouts[layoutIdx].stepRate *= viewCount;
5050 desc.layouts[layoutIdx].stride = it->stride();
5051 }
5052}
5053
5054void QMetalGraphicsPipelineData::setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc)
5055{
5056 // same binding space for vertex and constant buffers - work it around
5057 // should be in native resource binding not SPIR-V, but this will work anyway
5058 const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1;
5059
5060 QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout();
5061 for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes();
5062 it != itEnd; ++it)
5063 {
5064 const uint loc = uint(it->location());
5065 desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format()));
5066 desc.attributes[loc].offset = NSUInteger(it->offset());
5067 desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding());
5068 }
5069 int bindingIndex = 0;
5070 for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings();
5071 it != itEnd; ++it, ++bindingIndex)
5072 {
5073 const uint layoutIdx = uint(firstVertexBinding + bindingIndex);
5074 if (desc.indexBufferIndex) {
5075 desc.layouts[layoutIdx].stepFunction =
5076 it->classification() == QRhiVertexInputBinding::PerInstance
5077 ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridXIndexed;
5078 } else {
5079 desc.layouts[layoutIdx].stepFunction =
5080 it->classification() == QRhiVertexInputBinding::PerInstance
5081 ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridX;
5082 }
5083 desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate());
5084 desc.layouts[layoutIdx].stride = it->stride();
5085 }
5086}
5087
5088void QRhiMetalData::trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc)
5089{
5090 if (binArch) {
5091 NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil];
5092 rpDesc.binaryArchives = binArchArray;
5093 }
5094}
5095
5096void QRhiMetalData::addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc)
5097{
5098 if (binArch) {
5099 NSError *err = nil;
5100 if (![binArch addRenderPipelineFunctionsWithDescriptor: rpDesc error: &err]) {
5101 const QString msg = QString::fromNSString(err.localizedDescription);
5102 qWarning("Failed to collect render pipeline functions to binary archive: %s", qPrintable(msg));
5103 }
5104 }
5105}
5106
5107bool QMetalGraphicsPipeline::createVertexFragmentPipeline()
5108{
5109 QRHI_RES_RHI(QRhiMetal);
5110
5111 MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor];
5112 d->setupVertexInputDescriptor(vertexDesc);
5113
5114 MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init];
5115 rpDesc.vertexDescriptor = vertexDesc;
5116
5117 // Mutability cannot be determined (slotted buffers could be set as
5118 // MTLMutabilityImmutable, but then we potentially need a different
5119 // descriptor for each buffer combination as this depends on the actual
5120 // buffers not just the resource binding layout), so leave
5121 // rpDesc.vertex/fragmentBuffers at the defaults.
5122
5123 for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
5124 auto cacheIt = rhiD->d->shaderCache.constFind(shaderStage);
5125 if (cacheIt != rhiD->d->shaderCache.constEnd()) {
5126 switch (shaderStage.type()) {
5127 case QRhiShaderStage::Vertex:
5128 d->vs = *cacheIt;
5129 [d->vs.lib retain];
5130 [d->vs.func retain];
5131 rpDesc.vertexFunction = d->vs.func;
5132 break;
5133 case QRhiShaderStage::Fragment:
5134 d->fs = *cacheIt;
5135 [d->fs.lib retain];
5136 [d->fs.func retain];
5137 rpDesc.fragmentFunction = d->fs.func;
5138 break;
5139 default:
5140 break;
5141 }
5142 } else {
5143 const QShader shader = shaderStage.shader();
5144 QString error;
5145 QByteArray entryPoint;
5146 QShaderKey activeKey;
5147 id<MTLLibrary> lib = rhiD->d->createMetalLib(shader, shaderStage.shaderVariant(),
5148 &error, &entryPoint, &activeKey);
5149 if (!lib) {
5150 qWarning("MSL shader compilation failed: %s", qPrintable(error));
5151 return false;
5152 }
5153 id<MTLFunction> func = rhiD->d->createMSLShaderFunction(lib, entryPoint);
5154 if (!func) {
5155 qWarning("MSL function for entry point %s not found", entryPoint.constData());
5156 [lib release];
5157 return false;
5158 }
5159 if (rhiD->d->shaderCache.count() >= QRhiMetal::MAX_SHADER_CACHE_ENTRIES) {
5160 // Use the simplest strategy: too many cached shaders -> drop them all.
5161 for (QMetalShader &s : rhiD->d->shaderCache)
5162 s.destroy();
5163 rhiD->d->shaderCache.clear();
5164 }
5165 switch (shaderStage.type()) {
5166 case QRhiShaderStage::Vertex:
5167 d->vs.lib = lib;
5168 d->vs.func = func;
5169 d->vs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
5170 d->vs.desc = shader.description();
5171 d->vs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
5172 rhiD->d->shaderCache.insert(shaderStage, d->vs);
5173 [d->vs.lib retain];
5174 [d->vs.func retain];
5175 rpDesc.vertexFunction = func;
5176 break;
5177 case QRhiShaderStage::Fragment:
5178 d->fs.lib = lib;
5179 d->fs.func = func;
5180 d->fs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
5181 d->fs.desc = shader.description();
5182 d->fs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
5183 rhiD->d->shaderCache.insert(shaderStage, d->fs);
5184 [d->fs.lib retain];
5185 [d->fs.func retain];
5186 rpDesc.fragmentFunction = func;
5187 break;
5188 default:
5189 [func release];
5190 [lib release];
5191 break;
5192 }
5193 }
5194 }
5195
5196 QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, m_renderPassDesc);
5197 setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD);
5198
5199 if (m_multiViewCount >= 2)
5200 rpDesc.inputPrimitiveTopology = toMetalPrimitiveTopologyClass(m_topology);
5201
5202 rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc);
5203
5204 if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
5205 rhiD->d->addRenderPipelineToBinaryArchive(rpDesc);
5206
5207 NSError *err = nil;
5208 d->ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err];
5209 [rpDesc release];
5210 if (!d->ps) {
5211 const QString msg = QString::fromNSString(err.localizedDescription);
5212 qWarning("Failed to create render pipeline state: %s", qPrintable(msg));
5213 return false;
5214 }
5215
5216 MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init];
5217 setupMetalDepthStencilDescriptor(dsDesc);
5218 d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc];
5219 [dsDesc release];
5220
5221 d->primitiveType = toMetalPrimitiveType(m_topology);
5222 mapStates();
5223
5224 return true;
5225}
5226
5227int QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(QShader::Variant vertexCompVariant)
5228{
5229 switch (vertexCompVariant) {
5230 case QShader::NonIndexedVertexAsComputeShader:
5231 return 0;
5232 case QShader::UInt32IndexedVertexAsComputeShader:
5233 return 1;
5234 case QShader::UInt16IndexedVertexAsComputeShader:
5235 return 2;
5236 default:
5237 break;
5238 }
5239 return -1;
5240}
5241
5242id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant)
5243{
5244 const int varIndex = vsCompVariantToIndex(vertexCompVariant);
5245 if (varIndex >= 0 && vertexComputeState[varIndex])
5246 return vertexComputeState[varIndex];
5247
5248 id<MTLFunction> func = nil;
5249 if (varIndex >= 0)
5250 func = compVs[varIndex].func;
5251
5252 if (!func) {
5253 qWarning("No compute function found for vertex shader translated for tessellation, this should not happen");
5254 return nil;
5255 }
5256
5257 const QMap<int, int> &ebb(compVs[varIndex].nativeShaderInfo.extraBufferBindings);
5258 const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1);
5259
5260 MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new];
5261 cpDesc.computeFunction = func;
5262 cpDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = YES;
5263 cpDesc.stageInputDescriptor = [MTLStageInputOutputDescriptor stageInputOutputDescriptor];
5264 if (indexBufferBinding >= 0) {
5265 if (vertexCompVariant == QShader::UInt32IndexedVertexAsComputeShader) {
5266 cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt32;
5267 cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding;
5268 } else if (vertexCompVariant == QShader::UInt16IndexedVertexAsComputeShader) {
5269 cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt16;
5270 cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding;
5271 }
5272 }
5273 q->setupStageInputDescriptor(cpDesc.stageInputDescriptor);
5274
5275 rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc);
5276
5277 if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
5278 rhiD->d->addComputePipelineToBinaryArchive(cpDesc);
5279
5280 NSError *err = nil;
5281 id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc
5282 options: MTLPipelineOptionNone
5283 reflection: nil
5284 error: &err];
5285 [cpDesc release];
5286 if (!ps) {
5287 const QString msg = QString::fromNSString(err.localizedDescription);
5288 qWarning("Failed to create compute pipeline state: %s", qPrintable(msg));
5289 } else {
5290 vertexComputeState[varIndex] = ps;
5291 }
5292 // not retained, the only owner is vertexComputeState and so the QRhiGraphicsPipeline
5293 return ps;
5294}
5295
5296id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::tescCompPipeline(QRhiMetal *rhiD)
5297{
5298 if (tessControlComputeState)
5299 return tessControlComputeState;
5300
5301 MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new];
5302 cpDesc.computeFunction = compTesc.func;
5303
5304 rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc);
5305
5306 if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
5307 rhiD->d->addComputePipelineToBinaryArchive(cpDesc);
5308
5309 NSError *err = nil;
5310 id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc
5311 options: MTLPipelineOptionNone
5312 reflection: nil
5313 error: &err];
5314 [cpDesc release];
5315 if (!ps) {
5316 const QString msg = QString::fromNSString(err.localizedDescription);
5317 qWarning("Failed to create compute pipeline state: %s", qPrintable(msg));
5318 } else {
5319 tessControlComputeState = ps;
5320 }
5321 // not retained, the only owner is tessControlComputeState and so the QRhiGraphicsPipeline
5322 return ps;
5323}
5324
5325static inline bool indexTaken(quint32 index, quint64 indices)
5326{
5327 return (indices >> index) & 0x1;
5328}
5329
5330static inline void takeIndex(quint32 index, quint64 &indices)
5331{
5332 indices |= 1 << index;
5333}
5334
5335static inline int nextAttributeIndex(quint64 indices)
5336{
5337 // Maximum number of vertex attributes per vertex descriptor. There does
5338 // not appear to be a way to query this from the implementation.
5339 // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf indicates
5340 // that all GPU families have a value of 31.
5341 static const int maxVertexAttributes = 31;
5342
5343 for (int index = 0; index < maxVertexAttributes; ++index) {
5344 if (!indexTaken(index, indices))
5345 return index;
5346 }
5347
5348 Q_UNREACHABLE_RETURN(-1);
5349}
5350
5351static inline int aligned(quint32 offset, quint32 alignment)
5352{
5353 return ((offset + alignment - 1) / alignment) * alignment;
5354}
5355
5356template<typename T>
5357static void addUnusedVertexAttribute(const T &variable, QRhiMetal *rhiD, quint32 &offset, quint32 &vertexAlignment)
5358{
5359
5360 int elements = 1;
5361 for (const int dim : variable.arrayDims)
5362 elements *= dim;
5363
5364 if (variable.type == QShaderDescription::VariableType::Struct) {
5365 for (int element = 0; element < elements; ++element) {
5366 for (const auto &member : variable.structMembers) {
5367 addUnusedVertexAttribute(member, rhiD, offset, vertexAlignment);
5368 }
5369 }
5370 } else {
5371 const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type);
5372 const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format);
5373
5374 // MSL specification 3.0 says alignment = size for non packed scalars and vectors
5375 const quint32 alignment = size;
5376 vertexAlignment = std::max(vertexAlignment, alignment);
5377
5378 for (int element = 0; element < elements; ++element) {
5379 // adjust alignment
5380 offset = aligned(offset, alignment);
5381 offset += size;
5382 }
5383 }
5384}
5385
5386template<typename T>
5387static void addVertexAttribute(const T &variable, int binding, QRhiMetal *rhiD, int &index, quint32 &offset, MTLVertexAttributeDescriptorArray *attributes, quint64 &indices, quint32 &vertexAlignment)
5388{
5389
5390 int elements = 1;
5391 for (const int dim : variable.arrayDims)
5392 elements *= dim;
5393
5394 if (variable.type == QShaderDescription::VariableType::Struct) {
5395 for (int element = 0; element < elements; ++element) {
5396 for (const auto &member : variable.structMembers) {
5397 addVertexAttribute(member, binding, rhiD, index, offset, attributes, indices, vertexAlignment);
5398 }
5399 }
5400 } else {
5401 const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type);
5402 const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format);
5403
5404 // MSL specification 3.0 says alignment = size for non packed scalars and vectors
5405 const quint32 alignment = size;
5406 vertexAlignment = std::max(vertexAlignment, alignment);
5407
5408 for (int element = 0; element < elements; ++element) {
5409 Q_ASSERT(!indexTaken(index, indices));
5410
5411 // adjust alignment
5412 offset = aligned(offset, alignment);
5413
5414 attributes[index].bufferIndex = binding;
5415 attributes[index].format = toMetalAttributeFormat(format);
5416 attributes[index].offset = offset;
5417
5418 takeIndex(index, indices);
5419 index++;
5420 if (indexTaken(index, indices))
5421 index = nextAttributeIndex(indices);
5422
5423 offset += size;
5424 }
5425 }
5426}
5427
5428static inline bool matches(const QList<QShaderDescription::BlockVariable> &a, const QList<QShaderDescription::BlockVariable> &b)
5429{
5430 if (a.size() == b.size()) {
5431 bool match = true;
5432 for (int i = 0; i < a.size() && match; ++i) {
5433 match &= a[i].type == b[i].type
5434 && a[i].arrayDims == b[i].arrayDims
5435 && matches(a[i].structMembers, b[i].structMembers);
5436 }
5437 return match;
5438 }
5439
5440 return false;
5441}
5442
5443static inline bool matches(const QShaderDescription::InOutVariable &a, const QShaderDescription::InOutVariable &b)
5444{
5445 return a.location == b.location
5446 && a.type == b.type
5447 && a.perPatch == b.perPatch
5448 && matches(a.structMembers, b.structMembers);
5449}
5450
5451//
5452// Create the tessellation evaluation render pipeline state
5453//
5454// The tesc runs as a compute shader in a compute pipeline and writes per patch and per patch
5455// control point data into separate storage buffers. The tese runs as a vertex shader in a render
5456// pipeline. Our task is to generate a render pipeline descriptor for the tese that pulls vertices
5457// from these buffers.
5458//
5459// As the buffers we are pulling vertices from are written by a compute pipeline, they follow the
5460// MSL alignment conventions which we must take into account when generating our
5461// MTLVertexDescriptor. We must include the user defined tese input attributes, and any builtins
5462// that were used.
5463//
5464// SPIRV-Cross generates the MSL tese shader code with input attribute indices that reflect the
5465// specified GLSL locations. Interface blocks are flattened with each member having an incremented
5466// attribute index. SPIRV-Cross reports an error on compilation if there are clashes in the index
5467// address space.
5468//
5469// After the user specified attributes are processed, SPIRV-Cross places the in-use builtins at the
5470// next available (lowest value) attribute index. Tese builtins are processed in the following
5471// order:
5472//
5473// in gl_PerVertex
5474// {
5475// vec4 gl_Position;
5476// float gl_PointSize;
5477// float gl_ClipDistance[];
5478// };
5479//
5480// patch in float gl_TessLevelOuter[4];
5481// patch in float gl_TessLevelInner[2];
5482//
5483// Enumerations in QShaderDescription::BuiltinType are defined in this order.
5484//
5485// For quads, SPIRV-Cross places MTLQuadTessellationFactorsHalf per patch in the tessellation
5486// factor buffer. For triangles it uses MTLTriangleTessellationFactorsHalf.
5487//
5488// It should be noted that SPIRV-Cross handles the following builtin inputs internally, with no
5489// host side support required.
5490//
5491// in vec3 gl_TessCoord;
5492// in int gl_PatchVerticesIn;
5493// in int gl_PrimitiveID;
5494//
5495id<MTLRenderPipelineState> QMetalGraphicsPipelineData::Tessellation::teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline)
5496{
5497 if (pipeline->d->ps)
5498 return pipeline->d->ps;
5499
5500 MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init];
5501 MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor];
5502
5503 // tesc output buffers
5504 const QMap<int, int> &ebb(compTesc.nativeShaderInfo.extraBufferBindings);
5505 const int tescOutputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
5506 const int tescPatchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1);
5507 const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1);
5508 quint32 offsetInTescOutput = 0;
5509 quint32 offsetInTescPatchOutput = 0;
5510 quint32 offsetInTessFactorBuffer = 0;
5511 quint32 tescOutputAlignment = 0;
5512 quint32 tescPatchOutputAlignment = 0;
5513 quint32 tessFactorAlignment = 0;
5514 QSet<int> usedBuffers;
5515
5516 // tesc output variables in ascending location order
5517 QMap<int, QShaderDescription::InOutVariable> tescOutVars;
5518 for (const auto &tescOutVar : compTesc.desc.outputVariables())
5519 tescOutVars[tescOutVar.location] = tescOutVar;
5520
5521 // tese input variables in ascending location order
5522 QMap<int, QShaderDescription::InOutVariable> teseInVars;
5523 for (const auto &teseInVar : vertTese.desc.inputVariables())
5524 teseInVars[teseInVar.location] = teseInVar;
5525
5526 // bit mask tracking usage of vertex attribute indices
5527 quint64 indices = 0;
5528
5529 for (QShaderDescription::InOutVariable &tescOutVar : tescOutVars) {
5530
5531 int index = tescOutVar.location;
5532 int binding = -1;
5533 quint32 *offset = nullptr;
5534 quint32 *alignment = nullptr;
5535
5536 if (tescOutVar.perPatch) {
5537 binding = tescPatchOutputBufferBinding;
5538 offset = &offsetInTescPatchOutput;
5539 alignment = &tescPatchOutputAlignment;
5540 } else {
5541 tescOutVar.arrayDims.removeLast();
5542 binding = tescOutputBufferBinding;
5543 offset = &offsetInTescOutput;
5544 alignment = &tescOutputAlignment;
5545 }
5546
5547 if (teseInVars.contains(index)) {
5548
5549 if (!matches(teseInVars[index], tescOutVar)) {
5550 qWarning() << "mismatched tessellation control output -> tesssellation evaluation input at location" << index;
5551 qWarning() << " tesc out:" << tescOutVar;
5552 qWarning() << " tese in:" << teseInVars[index];
5553 }
5554
5555 if (binding != -1) {
5556 addVertexAttribute(tescOutVar, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment);
5557 usedBuffers << binding;
5558 } else {
5559 qWarning() << "baked tessellation control shader missing output buffer binding information";
5560 addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment);
5561 }
5562
5563 } else {
5564 qWarning() << "missing tessellation evaluation input for tessellation control output:" << tescOutVar;
5565 addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment);
5566 }
5567
5568 teseInVars.remove(tescOutVar.location);
5569 }
5570
5571 for (const QShaderDescription::InOutVariable &teseInVar : teseInVars)
5572 qWarning() << "missing tessellation control output for tessellation evaluation input:" << teseInVar;
5573
5574 // tesc output builtins in ascending location order
5575 QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> tescOutBuiltins;
5576 for (const auto &tescOutBuiltin : compTesc.desc.outputBuiltinVariables())
5577 tescOutBuiltins[tescOutBuiltin.type] = tescOutBuiltin;
5578
5579 // tese input builtins in ascending location order
5580 QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> teseInBuiltins;
5581 for (const auto &teseInBuiltin : vertTese.desc.inputBuiltinVariables())
5582 teseInBuiltins[teseInBuiltin.type] = teseInBuiltin;
5583
5584 const bool trianglesMode = vertTese.desc.tessellationMode() == QShaderDescription::TrianglesTessellationMode;
5585 bool tessLevelAdded = false;
5586
5587 for (const QShaderDescription::BuiltinVariable &builtin : tescOutBuiltins) {
5588
5589 QShaderDescription::InOutVariable variable;
5590 int binding = -1;
5591 quint32 *offset = nullptr;
5592 quint32 *alignment = nullptr;
5593
5594 switch (builtin.type) {
5595 case QShaderDescription::BuiltinType::PositionBuiltin:
5596 variable.type = QShaderDescription::VariableType::Vec4;
5597 binding = tescOutputBufferBinding;
5598 offset = &offsetInTescOutput;
5599 alignment = &tescOutputAlignment;
5600 break;
5601 case QShaderDescription::BuiltinType::PointSizeBuiltin:
5602 variable.type = QShaderDescription::VariableType::Float;
5603 binding = tescOutputBufferBinding;
5604 offset = &offsetInTescOutput;
5605 alignment = &tescOutputAlignment;
5606 break;
5607 case QShaderDescription::BuiltinType::ClipDistanceBuiltin:
5608 variable.type = QShaderDescription::VariableType::Float;
5609 variable.arrayDims = builtin.arrayDims;
5610 binding = tescOutputBufferBinding;
5611 offset = &offsetInTescOutput;
5612 alignment = &tescOutputAlignment;
5613 break;
5614 case QShaderDescription::BuiltinType::TessLevelOuterBuiltin:
5615 variable.type = QShaderDescription::VariableType::Half4;
5616 binding = tessFactorBufferBinding;
5617 offset = &offsetInTessFactorBuffer;
5618 tessLevelAdded = trianglesMode;
5619 alignment = &tessFactorAlignment;
5620 break;
5621 case QShaderDescription::BuiltinType::TessLevelInnerBuiltin:
5622 if (trianglesMode) {
5623 if (!tessLevelAdded) {
5624 variable.type = QShaderDescription::VariableType::Half4;
5625 binding = tessFactorBufferBinding;
5626 offsetInTessFactorBuffer = 0;
5627 offset = &offsetInTessFactorBuffer;
5628 alignment = &tessFactorAlignment;
5629 tessLevelAdded = true;
5630 } else {
5631 teseInBuiltins.remove(builtin.type);
5632 continue;
5633 }
5634 } else {
5635 variable.type = QShaderDescription::VariableType::Half2;
5636 binding = tessFactorBufferBinding;
5637 offsetInTessFactorBuffer = 8;
5638 offset = &offsetInTessFactorBuffer;
5639 alignment = &tessFactorAlignment;
5640 }
5641 break;
5642 default:
5643 Q_UNREACHABLE();
5644 break;
5645 }
5646
5647 if (teseInBuiltins.contains(builtin.type)) {
5648 if (binding != -1) {
5649 int index = nextAttributeIndex(indices);
5650 addVertexAttribute(variable, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment);
5651 usedBuffers << binding;
5652 } else {
5653 qWarning() << "baked tessellation control shader missing output buffer binding information";
5654 addUnusedVertexAttribute(variable, rhiD, *offset, *alignment);
5655 }
5656 } else {
5657 addUnusedVertexAttribute(variable, rhiD, *offset, *alignment);
5658 }
5659
5660 teseInBuiltins.remove(builtin.type);
5661 }
5662
5663 for (const QShaderDescription::BuiltinVariable &builtin : teseInBuiltins) {
5664 switch (builtin.type) {
5665 case QShaderDescription::BuiltinType::PositionBuiltin:
5666 case QShaderDescription::BuiltinType::PointSizeBuiltin:
5667 case QShaderDescription::BuiltinType::ClipDistanceBuiltin:
5668 qWarning() << "missing tessellation control output for tessellation evaluation builtin input:" << builtin;
5669 break;
5670 default:
5671 break;
5672 }
5673 }
5674
5675 if (usedBuffers.contains(tescOutputBufferBinding)) {
5676 vertexDesc.layouts[tescOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatchControlPoint;
5677 vertexDesc.layouts[tescOutputBufferBinding].stride = aligned(offsetInTescOutput, tescOutputAlignment);
5678 }
5679
5680 if (usedBuffers.contains(tescPatchOutputBufferBinding)) {
5681 vertexDesc.layouts[tescPatchOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch;
5682 vertexDesc.layouts[tescPatchOutputBufferBinding].stride = aligned(offsetInTescPatchOutput, tescPatchOutputAlignment);
5683 }
5684
5685 if (usedBuffers.contains(tessFactorBufferBinding)) {
5686 vertexDesc.layouts[tessFactorBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch;
5687 vertexDesc.layouts[tessFactorBufferBinding].stride = trianglesMode ? sizeof(MTLTriangleTessellationFactorsHalf) : sizeof(MTLQuadTessellationFactorsHalf);
5688 }
5689
5690 rpDesc.vertexDescriptor = vertexDesc;
5691 rpDesc.vertexFunction = vertTese.func;
5692 rpDesc.fragmentFunction = pipeline->d->fs.func;
5693
5694 // The portable, cross-API approach is to use CCW, the results are then
5695 // identical (assuming the applied clipSpaceCorrMatrix) for all the 3D
5696 // APIs. The tess.eval. GLSL shader is thus expected to specify ccw. If it
5697 // doesn't, things may not work as expected.
5698 rpDesc.tessellationOutputWindingOrder = toMetalTessellationWindingOrder(vertTese.desc.tessellationWindingOrder());
5699
5700 rpDesc.tessellationPartitionMode = toMetalTessellationPartitionMode(vertTese.desc.tessellationPartitioning());
5701
5702 QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, pipeline->renderPassDescriptor());
5703 pipeline->setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD);
5704
5705 rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc);
5706
5707 if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
5708 rhiD->d->addRenderPipelineToBinaryArchive(rpDesc);
5709
5710 NSError *err = nil;
5711 id<MTLRenderPipelineState> ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err];
5712 [rpDesc release];
5713 if (!ps) {
5714 const QString msg = QString::fromNSString(err.localizedDescription);
5715 qWarning("Failed to create render pipeline state for tessellation: %s", qPrintable(msg));
5716 } else {
5717 // ps is stored in the QMetalGraphicsPipelineData so the end result in this
5718 // regard is no different from what createVertexFragmentPipeline does
5719 pipeline->d->ps = ps;
5720 }
5721 return ps;
5722}
5723
5724QMetalBuffer *QMetalGraphicsPipelineData::ExtraBufferManager::acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type)
5725{
5726 QVector<QMetalBuffer *> *workBuffers = type == WorkBufType::DeviceLocal ? &deviceLocalWorkBuffers : &hostVisibleWorkBuffers;
5727
5728 // Check if something is reusable as-is.
5729 for (QMetalBuffer *workBuf : *workBuffers) {
5730 if (workBuf && workBuf->lastActiveFrameSlot == -1 && workBuf->size() >= size) {
5731 workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot;
5732 return workBuf;
5733 }
5734 }
5735
5736 // Once the pool is above a certain threshold, see if there is something
5737 // unused (but too small) and recreate that our size.
5738 if (workBuffers->count() > QMTL_FRAMES_IN_FLIGHT * 8) {
5739 for (QMetalBuffer *workBuf : *workBuffers) {
5740 if (workBuf && workBuf->lastActiveFrameSlot == -1) {
5741 workBuf->setSize(size);
5742 if (workBuf->create()) {
5743 workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot;
5744 return workBuf;
5745 }
5746 }
5747 }
5748 }
5749
5750 // Add a new buffer to the pool.
5751 QMetalBuffer *buf;
5752 if (type == WorkBufType::DeviceLocal) {
5753 // for GPU->GPU data (non-slotted, not necessarily host writable)
5754 buf = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size);
5755 } else {
5756 // for CPU->GPU (non-slotted, host writable/coherent)
5757 buf = new QMetalBuffer(rhiD, QRhiBuffer::Dynamic, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size);
5758 }
5759 if (buf->create()) {
5760 buf->lastActiveFrameSlot = rhiD->currentFrameSlot;
5761 workBuffers->append(buf);
5762 return buf;
5763 }
5764
5765 qWarning("Failed to acquire work buffer of size %u", size);
5766 return nullptr;
5767}
5768
5769bool QMetalGraphicsPipeline::createTessellationPipelines(const QShader &tessVert, const QShader &tesc, const QShader &tese, const QShader &tessFrag)
5770{
5771 QRHI_RES_RHI(QRhiMetal);
5772 QString error;
5773 QByteArray entryPoint;
5774 QShaderKey activeKey;
5775
5776 const QShaderDescription tescDesc = tesc.description();
5777 const QShaderDescription teseDesc = tese.description();
5778 d->tess.inControlPointCount = uint(m_patchControlPointCount);
5779 d->tess.outControlPointCount = tescDesc.tessellationOutputVertexCount();
5780 if (!d->tess.outControlPointCount)
5781 d->tess.outControlPointCount = teseDesc.tessellationOutputVertexCount();
5782
5783 if (!d->tess.outControlPointCount) {
5784 qWarning("Failed to determine output vertex count from the tessellation control or evaluation shader, cannot tessellate");
5785 d->tess.enabled = false;
5786 d->tess.failed = true;
5787 return false;
5788 }
5789
5790 if (m_multiViewCount >= 2)
5791 qWarning("Multiview is not supported with tessellation");
5792
5793 // Now the vertex shader is a compute shader.
5794 // It should have three dedicated *VertexAsComputeShader variants.
5795 // What the requested variant was (Standard or Batchable) plays no role here.
5796 // (the Qt Quick scenegraph does not use tessellation with its materials)
5797 // Create all three versions.
5798
5799 bool variantsPresent[3] = {};
5800 const QVector<QShaderKey> tessVertKeys = tessVert.availableShaders();
5801 for (const QShaderKey &k : tessVertKeys) {
5802 switch (k.sourceVariant()) {
5803 case QShader::NonIndexedVertexAsComputeShader:
5804 variantsPresent[0] = true;
5805 break;
5806 case QShader::UInt32IndexedVertexAsComputeShader:
5807 variantsPresent[1] = true;
5808 break;
5809 case QShader::UInt16IndexedVertexAsComputeShader:
5810 variantsPresent[2] = true;
5811 break;
5812 default:
5813 break;
5814 }
5815 }
5816 if (!(variantsPresent[0] && variantsPresent[1] && variantsPresent[2])) {
5817 qWarning("Vertex shader is not prepared for Metal tessellation. Cannot tessellate. "
5818 "Perhaps the relevant variants (UInt32IndexedVertexAsComputeShader et al) were not generated? "
5819 "Try passing --msltess to qsb.");
5820 d->tess.enabled = false;
5821 d->tess.failed = true;
5822 return false;
5823 }
5824
5825 int varIndex = 0; // Will map NonIndexed as 0, UInt32 as 1, UInt16 as 2. Do not change this ordering.
5826 for (QShader::Variant variant : {
5827 QShader::NonIndexedVertexAsComputeShader,
5828 QShader::UInt32IndexedVertexAsComputeShader,
5829 QShader::UInt16IndexedVertexAsComputeShader })
5830 {
5831 id<MTLLibrary> lib = rhiD->d->createMetalLib(tessVert, variant, &error, &entryPoint, &activeKey);
5832 if (!lib) {
5833 qWarning("MSL shader compilation failed for vertex-as-compute shader %d: %s", int(variant), qPrintable(error));
5834 d->tess.enabled = false;
5835 d->tess.failed = true;
5836 return false;
5837 }
5838 id<MTLFunction> func = rhiD->d->createMSLShaderFunction(lib, entryPoint);
5839 if (!func) {
5840 qWarning("MSL function for entry point %s not found", entryPoint.constData());
5841 [lib release];
5842 d->tess.enabled = false;
5843 d->tess.failed = true;
5844 return false;
5845 }
5846 QMetalShader &compVs(d->tess.compVs[varIndex]);
5847 compVs.lib = lib;
5848 compVs.func = func;
5849 compVs.desc = tessVert.description();
5850 compVs.nativeResourceBindingMap = tessVert.nativeResourceBindingMap(activeKey);
5851 compVs.nativeShaderInfo = tessVert.nativeShaderInfo(activeKey);
5852
5853 // pre-create all three MTLComputePipelineStates
5854 if (!d->tess.vsCompPipeline(rhiD, variant)) {
5855 qWarning("Failed to pre-generate compute pipeline for vertex compute shader (tessellation variant %d)", int(variant));
5856 d->tess.enabled = false;
5857 d->tess.failed = true;
5858 return false;
5859 }
5860
5861 ++varIndex;
5862 }
5863
5864 // Pipeline #2 is a compute that runs the tessellation control (compute) shader
5865 id<MTLLibrary> tessControlLib = rhiD->d->createMetalLib(tesc, QShader::StandardShader, &error, &entryPoint, &activeKey);
5866 if (!tessControlLib) {
5867 qWarning("MSL shader compilation failed for tessellation control compute shader: %s", qPrintable(error));
5868 d->tess.enabled = false;
5869 d->tess.failed = true;
5870 return false;
5871 }
5872 id<MTLFunction> tessControlFunc = rhiD->d->createMSLShaderFunction(tessControlLib, entryPoint);
5873 if (!tessControlFunc) {
5874 qWarning("MSL function for entry point %s not found", entryPoint.constData());
5875 [tessControlLib release];
5876 d->tess.enabled = false;
5877 d->tess.failed = true;
5878 return false;
5879 }
5880 d->tess.compTesc.lib = tessControlLib;
5881 d->tess.compTesc.func = tessControlFunc;
5882 d->tess.compTesc.desc = tesc.description();
5883 d->tess.compTesc.nativeResourceBindingMap = tesc.nativeResourceBindingMap(activeKey);
5884 d->tess.compTesc.nativeShaderInfo = tesc.nativeShaderInfo(activeKey);
5885 if (!d->tess.tescCompPipeline(rhiD)) {
5886 qWarning("Failed to pre-generate compute pipeline for tessellation control shader");
5887 d->tess.enabled = false;
5888 d->tess.failed = true;
5889 return false;
5890 }
5891
5892 // Pipeline #3 is a render pipeline with the tessellation evaluation (vertex) + the fragment shader
5893 id<MTLLibrary> tessEvalLib = rhiD->d->createMetalLib(tese, QShader::StandardShader, &error, &entryPoint, &activeKey);
5894 if (!tessEvalLib) {
5895 qWarning("MSL shader compilation failed for tessellation evaluation vertex shader: %s", qPrintable(error));
5896 d->tess.enabled = false;
5897 d->tess.failed = true;
5898 return false;
5899 }
5900 id<MTLFunction> tessEvalFunc = rhiD->d->createMSLShaderFunction(tessEvalLib, entryPoint);
5901 if (!tessEvalFunc) {
5902 qWarning("MSL function for entry point %s not found", entryPoint.constData());
5903 [tessEvalLib release];
5904 d->tess.enabled = false;
5905 d->tess.failed = true;
5906 return false;
5907 }
5908 d->tess.vertTese.lib = tessEvalLib;
5909 d->tess.vertTese.func = tessEvalFunc;
5910 d->tess.vertTese.desc = tese.description();
5911 d->tess.vertTese.nativeResourceBindingMap = tese.nativeResourceBindingMap(activeKey);
5912 d->tess.vertTese.nativeShaderInfo = tese.nativeShaderInfo(activeKey);
5913
5914 id<MTLLibrary> fragLib = rhiD->d->createMetalLib(tessFrag, QShader::StandardShader, &error, &entryPoint, &activeKey);
5915 if (!fragLib) {
5916 qWarning("MSL shader compilation failed for fragment shader: %s", qPrintable(error));
5917 d->tess.enabled = false;
5918 d->tess.failed = true;
5919 return false;
5920 }
5921 id<MTLFunction> fragFunc = rhiD->d->createMSLShaderFunction(fragLib, entryPoint);
5922 if (!fragFunc) {
5923 qWarning("MSL function for entry point %s not found", entryPoint.constData());
5924 [fragLib release];
5925 d->tess.enabled = false;
5926 d->tess.failed = true;
5927 return false;
5928 }
5929 d->fs.lib = fragLib;
5930 d->fs.func = fragFunc;
5931 d->fs.desc = tessFrag.description();
5932 d->fs.nativeShaderInfo = tessFrag.nativeShaderInfo(activeKey);
5933 d->fs.nativeResourceBindingMap = tessFrag.nativeResourceBindingMap(activeKey);
5934
5935 if (!d->tess.teseFragRenderPipeline(rhiD, this)) {
5936 qWarning("Failed to pre-generate render pipeline for tessellation evaluation + fragment shader");
5937 d->tess.enabled = false;
5938 d->tess.failed = true;
5939 return false;
5940 }
5941
5942 MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init];
5943 setupMetalDepthStencilDescriptor(dsDesc);
5944 d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc];
5945 [dsDesc release];
5946
5947 // no primitiveType
5948 mapStates();
5949
5950 return true;
5951}
5952
5953bool QMetalGraphicsPipeline::create()
5954{
5955 destroy(); // no early test, always invoke and leave it to destroy to decide what to clean up
5956
5957 QRHI_RES_RHI(QRhiMetal);
5958 rhiD->pipelineCreationStart();
5959 if (!rhiD->sanityCheckGraphicsPipeline(this))
5960 return false;
5961
5962 // See if tessellation is involved. Things will be very different, if so.
5963 QShader tessVert;
5964 QShader tesc;
5965 QShader tese;
5966 QShader tessFrag;
5967 for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
5968 switch (shaderStage.type()) {
5969 case QRhiShaderStage::Vertex:
5970 tessVert = shaderStage.shader();
5971 break;
5972 case QRhiShaderStage::TessellationControl:
5973 tesc = shaderStage.shader();
5974 break;
5975 case QRhiShaderStage::TessellationEvaluation:
5976 tese = shaderStage.shader();
5977 break;
5978 case QRhiShaderStage::Fragment:
5979 tessFrag = shaderStage.shader();
5980 break;
5981 default:
5982 break;
5983 }
5984 }
5985 d->tess.enabled = tesc.isValid() && tese.isValid() && m_topology == Patches && m_patchControlPointCount > 0;
5986 d->tess.failed = false;
5987
5988 bool ok = d->tess.enabled ? createTessellationPipelines(tessVert, tesc, tese, tessFrag) : createVertexFragmentPipeline();
5989 if (!ok)
5990 return false;
5991
5992 // SPIRV-Cross buffer size buffers
5993 int buffers = 0;
5994 QVarLengthArray<QMetalShader *, 6> shaders;
5995 if (d->tess.enabled) {
5996 shaders.append(&d->tess.compVs[0]);
5997 shaders.append(&d->tess.compVs[1]);
5998 shaders.append(&d->tess.compVs[2]);
5999 shaders.append(&d->tess.compTesc);
6000 shaders.append(&d->tess.vertTese);
6001 } else {
6002 shaders.append(&d->vs);
6003 }
6004 shaders.append(&d->fs);
6005
6006 for (QMetalShader *shader : shaders) {
6007 if (shader->nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
6008 const int binding = shader->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
6009 shader->nativeResourceBindingMap[binding] = qMakePair(binding, -1);
6010 int maxNativeBinding = 0;
6011 for (const QShaderDescription::StorageBlock &block : shader->desc.storageBlocks())
6012 maxNativeBinding = qMax(maxNativeBinding, shader->nativeResourceBindingMap[block.binding].first);
6013
6014 // we use one buffer to hold data for all graphics shader stages, each with a different offset.
6015 // buffer offsets must be 32byte aligned - adjust buffer count accordingly
6016 buffers += ((maxNativeBinding + 1 + 7) / 8) * 8;
6017 }
6018 }
6019
6020 if (buffers) {
6021 if (!d->bufferSizeBuffer)
6022 d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int));
6023
6024 d->bufferSizeBuffer->setSize(buffers * sizeof(int));
6025 d->bufferSizeBuffer->create();
6026 }
6027
6028 rhiD->pipelineCreationEnd();
6029 lastActiveFrameSlot = -1;
6030 generation += 1;
6031 rhiD->registerResource(this);
6032 return true;
6033}
6034
6035QMetalComputePipeline::QMetalComputePipeline(QRhiImplementation *rhi)
6036 : QRhiComputePipeline(rhi),
6037 d(new QMetalComputePipelineData)
6038{
6039}
6040
6041QMetalComputePipeline::~QMetalComputePipeline()
6042{
6043 destroy();
6044 delete d;
6045}
6046
6047void QMetalComputePipeline::destroy()
6048{
6049 d->cs.destroy();
6050
6051 if (!d->ps)
6052 return;
6053
6054 delete d->bufferSizeBuffer;
6055 d->bufferSizeBuffer = nullptr;
6056
6057 QRhiMetalData::DeferredReleaseEntry e;
6058 e.type = QRhiMetalData::DeferredReleaseEntry::ComputePipeline;
6059 e.lastActiveFrameSlot = lastActiveFrameSlot;
6060 e.computePipeline.pipelineState = d->ps;
6061 d->ps = nil;
6062
6063 QRHI_RES_RHI(QRhiMetal);
6064 if (rhiD) {
6065 rhiD->d->releaseQueue.append(e);
6066 rhiD->unregisterResource(this);
6067 }
6068}
6069
6070void QRhiMetalData::trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc)
6071{
6072 if (binArch) {
6073 NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil];
6074 cpDesc.binaryArchives = binArchArray;
6075 }
6076}
6077
6078void QRhiMetalData::addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc)
6079{
6080 if (binArch) {
6081 NSError *err = nil;
6082 if (![binArch addComputePipelineFunctionsWithDescriptor: cpDesc error: &err]) {
6083 const QString msg = QString::fromNSString(err.localizedDescription);
6084 qWarning("Failed to collect compute pipeline functions to binary archive: %s", qPrintable(msg));
6085 }
6086 }
6087}
6088
6089bool QMetalComputePipeline::create()
6090{
6091 if (d->ps)
6092 destroy();
6093
6094 QRHI_RES_RHI(QRhiMetal);
6095 rhiD->pipelineCreationStart();
6096
6097 auto cacheIt = rhiD->d->shaderCache.constFind(m_shaderStage);
6098 if (cacheIt != rhiD->d->shaderCache.constEnd()) {
6099 d->cs = *cacheIt;
6100 } else {
6101 const QShader shader = m_shaderStage.shader();
6102 QString error;
6103 QByteArray entryPoint;
6104 QShaderKey activeKey;
6105 id<MTLLibrary> lib = rhiD->d->createMetalLib(shader, m_shaderStage.shaderVariant(),
6106 &error, &entryPoint, &activeKey);
6107 if (!lib) {
6108 qWarning("MSL shader compilation failed: %s", qPrintable(error));
6109 return false;
6110 }
6111 id<MTLFunction> func = rhiD->d->createMSLShaderFunction(lib, entryPoint);
6112 if (!func) {
6113 qWarning("MSL function for entry point %s not found", entryPoint.constData());
6114 [lib release];
6115 return false;
6116 }
6117 d->cs.lib = lib;
6118 d->cs.func = func;
6119 d->cs.localSize = shader.description().computeShaderLocalSize();
6120 d->cs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
6121 d->cs.desc = shader.description();
6122 d->cs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
6123
6124 // SPIRV-Cross buffer size buffers
6125 if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
6126 const int binding = d->cs.nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
6127 d->cs.nativeResourceBindingMap[binding] = qMakePair(binding, -1);
6128 }
6129
6130 if (rhiD->d->shaderCache.count() >= QRhiMetal::MAX_SHADER_CACHE_ENTRIES) {
6131 for (QMetalShader &s : rhiD->d->shaderCache)
6132 s.destroy();
6133 rhiD->d->shaderCache.clear();
6134 }
6135 rhiD->d->shaderCache.insert(m_shaderStage, d->cs);
6136 }
6137
6138 [d->cs.lib retain];
6139 [d->cs.func retain];
6140
6141 d->localSize = MTLSizeMake(d->cs.localSize[0], d->cs.localSize[1], d->cs.localSize[2]);
6142
6143 MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new];
6144 cpDesc.computeFunction = d->cs.func;
6145
6146 rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc);
6147
6148 if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
6149 rhiD->d->addComputePipelineToBinaryArchive(cpDesc);
6150
6151 NSError *err = nil;
6152 d->ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc
6153 options: MTLPipelineOptionNone
6154 reflection: nil
6155 error: &err];
6156 [cpDesc release];
6157 if (!d->ps) {
6158 const QString msg = QString::fromNSString(err.localizedDescription);
6159 qWarning("Failed to create compute pipeline state: %s", qPrintable(msg));
6160 return false;
6161 }
6162
6163 // SPIRV-Cross buffer size buffers
6164 if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
6165 int buffers = 0;
6166 for (const QShaderDescription::StorageBlock &block : d->cs.desc.storageBlocks())
6167 buffers = qMax(buffers, d->cs.nativeResourceBindingMap[block.binding].first);
6168
6169 buffers += 1;
6170
6171 if (!d->bufferSizeBuffer)
6172 d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int));
6173
6174 d->bufferSizeBuffer->setSize(buffers * sizeof(int));
6175 d->bufferSizeBuffer->create();
6176 }
6177
6178 rhiD->pipelineCreationEnd();
6179 lastActiveFrameSlot = -1;
6180 generation += 1;
6181 rhiD->registerResource(this);
6182 return true;
6183}
6184
6185QMetalCommandBuffer::QMetalCommandBuffer(QRhiImplementation *rhi)
6186 : QRhiCommandBuffer(rhi),
6187 d(new QMetalCommandBufferData)
6188{
6189 resetState();
6190}
6191
6192QMetalCommandBuffer::~QMetalCommandBuffer()
6193{
6194 destroy();
6195 delete d;
6196}
6197
6198void QMetalCommandBuffer::destroy()
6199{
6200 // nothing to do here, we do not own the MTL cb object
6201}
6202
6203const QRhiNativeHandles *QMetalCommandBuffer::nativeHandles()
6204{
6205 nativeHandlesStruct.commandBuffer = (MTLCommandBuffer *) d->cb;
6206 nativeHandlesStruct.encoder = (MTLRenderCommandEncoder *) d->currentRenderPassEncoder;
6207 return &nativeHandlesStruct;
6208}
6209
6210void QMetalCommandBuffer::resetState(double lastGpuTime)
6211{
6212 d->lastGpuTime = lastGpuTime;
6213 d->currentRenderPassEncoder = nil;
6214 d->currentComputePassEncoder = nil;
6215 d->tessellationComputeEncoder = nil;
6216 d->currentPassRpDesc = nil;
6217 resetPerPassState();
6218}
6219
6220void QMetalCommandBuffer::resetPerPassState()
6221{
6222 recordingPass = NoPass;
6223 currentTarget = nullptr;
6224 resetPerPassCachedState();
6225}
6226
6227void QMetalCommandBuffer::resetPerPassCachedState()
6228{
6229 currentGraphicsPipeline = nullptr;
6230 currentComputePipeline = nullptr;
6231 currentPipelineGeneration = 0;
6232 currentGraphicsSrb = nullptr;
6233 currentComputeSrb = nullptr;
6234 currentSrbGeneration = 0;
6235 currentResSlot = -1;
6236 currentIndexBuffer = nullptr;
6237 currentIndexOffset = 0;
6238 currentIndexFormat = QRhiCommandBuffer::IndexUInt16;
6239 currentCullMode = -1;
6240 currentTriangleFillMode = -1;
6241 currentFrontFaceWinding = -1;
6242 currentDepthBiasValues = { 0.0f, 0.0f };
6243
6244 d->currentShaderResourceBindingState = {};
6245 d->currentDepthStencilState = nil;
6246 d->currentFirstVertexBinding = -1;
6247 d->currentVertexInputsBuffers.clear();
6248 d->currentVertexInputOffsets.clear();
6249}
6250
6251QMetalSwapChain::QMetalSwapChain(QRhiImplementation *rhi)
6252 : QRhiSwapChain(rhi),
6253 rtWrapper(rhi, this),
6254 cbWrapper(rhi),
6255 d(new QMetalSwapChainData)
6256{
6257 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
6258 d->sem[i] = nullptr;
6259 d->msaaTex[i] = nil;
6260 }
6261}
6262
6263QMetalSwapChain::~QMetalSwapChain()
6264{
6265 destroy();
6266 delete d;
6267}
6268
6269void QMetalSwapChain::destroy()
6270{
6271 if (!d->layer)
6272 return;
6273
6274 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
6275 if (d->sem[i]) {
6276 // the semaphores cannot be released if they do not have the initial value
6277 waitUntilCompleted(i);
6278
6279 dispatch_release(d->sem[i]);
6280 d->sem[i] = nullptr;
6281 }
6282 }
6283
6284 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
6285 [d->msaaTex[i] release];
6286 d->msaaTex[i] = nil;
6287 }
6288
6289 d->layer = nullptr;
6290 m_proxyData = {};
6291
6292 [d->curDrawable release];
6293 d->curDrawable = nil;
6294
6295 QRHI_RES_RHI(QRhiMetal);
6296 if (rhiD) {
6297 rhiD->swapchains.remove(this);
6298 rhiD->unregisterResource(this);
6299 }
6300}
6301
6302QRhiCommandBuffer *QMetalSwapChain::currentFrameCommandBuffer()
6303{
6304 return &cbWrapper;
6305}
6306
6307QRhiRenderTarget *QMetalSwapChain::currentFrameRenderTarget()
6308{
6309 return &rtWrapper;
6310}
6311
6312// view.layer should ideally be called on the main thread, otherwise the UI
6313// Thread Checker in Xcode drops a warning. Hence trying to proxy it through
6314// QRhiSwapChainProxyData instead of just calling this function directly.
6315static inline CAMetalLayer *layerForWindow(QWindow *window)
6316{
6317 Q_ASSERT(window);
6318#ifdef Q_OS_MACOS
6319 NSView *view = reinterpret_cast<NSView *>(window->winId());
6320#else
6321 UIView *view = reinterpret_cast<UIView *>(window->winId());
6322#endif
6323 Q_ASSERT(view);
6324 return static_cast<CAMetalLayer *>(view.layer);
6325}
6326
6327// If someone calls this, it is hopefully from the main thread, and they will
6328// then set the returned data on the QRhiSwapChain, so it won't need to query
6329// the layer on its own later on.
6330QRhiSwapChainProxyData QRhiMetal::updateSwapChainProxyData(QWindow *window)
6331{
6332 QRhiSwapChainProxyData d;
6333 d.reserved[0] = layerForWindow(window);
6334 return d;
6335}
6336
6337QSize QMetalSwapChain::surfacePixelSize()
6338{
6339 Q_ASSERT(m_window);
6340 CAMetalLayer *layer = d->layer;
6341 if (!layer)
6342 layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, m_window, QRhi::Metal, 0);
6343
6344 Q_ASSERT(layer);
6345 int height = (int)layer.bounds.size.height;
6346 int width = (int)layer.bounds.size.width;
6347 width *= layer.contentsScale;
6348 height *= layer.contentsScale;
6349 return QSize(width, height);
6350}
6351
6352bool QMetalSwapChain::isFormatSupported(Format f)
6353{
6354 if (f == HDRExtendedSrgbLinear) {
6355 if (@available(iOS 16.0, *))
6356 return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f;
6357 else
6358 return false;
6359 } else if (f == HDR10) {
6360 if (@available(iOS 16.0, *))
6361 return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f;
6362 else
6363 return false;
6364 } else if (f == HDRExtendedDisplayP3Linear) {
6365 return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f;
6366 }
6367 return f == SDR;
6368}
6369
6370QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor()
6371{
6372 QRHI_RES_RHI(QRhiMetal);
6373
6374 chooseFormats(); // ensure colorFormat and similar are filled out
6375
6376 QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi);
6377 rpD->colorAttachmentCount = 1;
6378 rpD->hasDepthStencil = m_depthStencil != nullptr;
6379
6380 rpD->colorFormat[0] = int(d->colorFormat);
6381
6382#ifdef Q_OS_MACOS
6383 // m_depthStencil may not be built yet so cannot rely on computed fields in it
6384 rpD->dsFormat = rhiD->d->dev.depth24Stencil8PixelFormatSupported
6385 ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8;
6386#else
6387 rpD->dsFormat = MTLPixelFormatDepth32Float_Stencil8;
6388#endif
6389
6390 rpD->hasShadingRateMap = m_shadingRateMap != nullptr;
6391
6392 rpD->updateSerializedFormat();
6393
6394 rhiD->registerResource(rpD, false);
6395 return rpD;
6396}
6397
6398void QMetalSwapChain::chooseFormats()
6399{
6400 QRHI_RES_RHI(QRhiMetal);
6401 samples = rhiD->effectiveSampleCount(m_sampleCount);
6402 // pick a format that is allowed for CAMetalLayer.pixelFormat
6403 if (m_format == HDRExtendedSrgbLinear || m_format == HDRExtendedDisplayP3Linear) {
6404 d->colorFormat = MTLPixelFormatRGBA16Float;
6405 d->rhiColorFormat = QRhiTexture::RGBA16F;
6406 return;
6407 }
6408 if (m_format == HDR10) {
6409 d->colorFormat = MTLPixelFormatRGB10A2Unorm;
6410 d->rhiColorFormat = QRhiTexture::RGB10A2;
6411 return;
6412 }
6413 d->colorFormat = m_flags.testFlag(sRGB) ? MTLPixelFormatBGRA8Unorm_sRGB : MTLPixelFormatBGRA8Unorm;
6414 d->rhiColorFormat = QRhiTexture::BGRA8;
6415}
6416
6417void QMetalSwapChain::waitUntilCompleted(int slot)
6418{
6419 // wait+signal is the general pattern to ensure the commands for a
6420 // given frame slot have completed (if sem is 1, we go 0 then 1; if
6421 // sem is 0 we go -1, block, completion increments to 0, then us to 1)
6422
6423 dispatch_semaphore_t sem = d->sem[slot];
6424 dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
6425 dispatch_semaphore_signal(sem);
6426}
6427
6428bool QMetalSwapChain::createOrResize()
6429{
6430 Q_ASSERT(m_window);
6431
6432 const bool needsRegistration = !window || window != m_window;
6433
6434 if (window && window != m_window)
6435 destroy();
6436 // else no destroy(), this is intentional
6437
6438 QRHI_RES_RHI(QRhiMetal);
6439 if (needsRegistration || !rhiD->swapchains.contains(this))
6440 rhiD->swapchains.insert(this);
6441
6442 window = m_window;
6443
6444 if (window->surfaceType() != QSurface::MetalSurface) {
6445 qWarning("QMetalSwapChain only supports MetalSurface windows");
6446 return false;
6447 }
6448
6449 d->layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, window, QRhi::Metal, 0);
6450 Q_ASSERT(d->layer);
6451
6452 chooseFormats();
6453 if (d->colorFormat != d->layer.pixelFormat)
6454 d->layer.pixelFormat = d->colorFormat;
6455
6456 if (m_format == HDRExtendedSrgbLinear) {
6457 if (@available(iOS 16.0, *)) {
6458 d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearSRGB);
6459 d->layer.wantsExtendedDynamicRangeContent = YES;
6460 }
6461 } else if (m_format == HDR10) {
6462 if (@available(iOS 16.0, *)) {
6463 d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_2100_PQ);
6464 d->layer.wantsExtendedDynamicRangeContent = YES;
6465 }
6466 } else if (m_format == HDRExtendedDisplayP3Linear) {
6467 if (@available(iOS 16.0, *)) {
6468 d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearDisplayP3);
6469 d->layer.wantsExtendedDynamicRangeContent = YES;
6470 }
6471 }
6472
6473 if (m_flags.testFlag(UsedAsTransferSource))
6474 d->layer.framebufferOnly = NO;
6475
6476#ifdef Q_OS_MACOS
6477 if (m_flags.testFlag(NoVSync))
6478 d->layer.displaySyncEnabled = NO;
6479#endif
6480
6481 if (m_flags.testFlag(SurfaceHasPreMulAlpha)) {
6482 d->layer.opaque = NO;
6483 } else if (m_flags.testFlag(SurfaceHasNonPreMulAlpha)) {
6484 // The CoreAnimation compositor is said to expect premultiplied alpha,
6485 // so this is then wrong when it comes to the blending operations but
6486 // there's nothing we can do. Fortunately Qt Quick always outputs
6487 // premultiplied alpha so it is not a problem there.
6488 d->layer.opaque = NO;
6489 } else {
6490 d->layer.opaque = YES;
6491 }
6492
6493 // Now set the layer's drawableSize which will stay set to the same value
6494 // until the next createOrResize(), thus ensuring atomicity with regards to
6495 // the drawable size in frames.
6496 int width = (int)d->layer.bounds.size.width;
6497 int height = (int)d->layer.bounds.size.height;
6498 CGSize layerSize = CGSizeMake(width, height);
6499 const float scaleFactor = d->layer.contentsScale;
6500 layerSize.width *= scaleFactor;
6501 layerSize.height *= scaleFactor;
6502 d->layer.drawableSize = layerSize;
6503
6504 m_currentPixelSize = QSizeF::fromCGSize(layerSize).toSize();
6505 pixelSize = m_currentPixelSize;
6506
6507 [d->layer setDevice: rhiD->d->dev];
6508
6509 [d->curDrawable release];
6510 d->curDrawable = nil;
6511
6512 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
6513 d->lastGpuTime[i] = 0;
6514 if (!d->sem[i])
6515 d->sem[i] = dispatch_semaphore_create(QMTL_FRAMES_IN_FLIGHT - 1);
6516 }
6517
6518 currentFrameSlot = 0;
6519 frameCount = 0;
6520
6521 ds = m_depthStencil ? QRHI_RES(QMetalRenderBuffer, m_depthStencil) : nullptr;
6522 if (m_depthStencil && m_depthStencil->sampleCount() != m_sampleCount) {
6523 qWarning("Depth-stencil buffer's sampleCount (%d) does not match color buffers' sample count (%d). Expect problems.",
6524 m_depthStencil->sampleCount(), m_sampleCount);
6525 }
6526 if (m_depthStencil && m_depthStencil->pixelSize() != pixelSize) {
6527 if (m_depthStencil->flags().testFlag(QRhiRenderBuffer::UsedWithSwapChainOnly)) {
6528 m_depthStencil->setPixelSize(pixelSize);
6529 if (!m_depthStencil->create())
6530 qWarning("Failed to rebuild swapchain's associated depth-stencil buffer for size %dx%d",
6531 pixelSize.width(), pixelSize.height());
6532 } else {
6533 qWarning("Depth-stencil buffer's size (%dx%d) does not match the layer size (%dx%d). Expect problems.",
6534 m_depthStencil->pixelSize().width(), m_depthStencil->pixelSize().height(),
6535 pixelSize.width(), pixelSize.height());
6536 }
6537 }
6538
6539 rtWrapper.setRenderPassDescriptor(m_renderPassDesc); // for the public getter in QRhiRenderTarget
6540 rtWrapper.d->pixelSize = pixelSize;
6541 rtWrapper.d->dpr = scaleFactor;
6542 rtWrapper.d->sampleCount = samples;
6543 rtWrapper.d->colorAttCount = 1;
6544 rtWrapper.d->dsAttCount = ds ? 1 : 0;
6545
6546 qCDebug(QRHI_LOG_INFO, "got CAMetalLayer, pixel size %dx%d (scale %.2f)",
6547 pixelSize.width(), pixelSize.height(), scaleFactor);
6548
6549 if (samples > 1) {
6550 MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init];
6551 desc.textureType = MTLTextureType2DMultisample;
6552 desc.pixelFormat = d->colorFormat;
6553 desc.width = NSUInteger(pixelSize.width());
6554 desc.height = NSUInteger(pixelSize.height());
6555 desc.sampleCount = NSUInteger(samples);
6556 desc.resourceOptions = MTLResourceStorageModePrivate;
6557 desc.storageMode = MTLStorageModePrivate;
6558 desc.usage = MTLTextureUsageRenderTarget;
6559 for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
6560 [d->msaaTex[i] release];
6561 d->msaaTex[i] = [rhiD->d->dev newTextureWithDescriptor: desc];
6562 }
6563 [desc release];
6564 }
6565
6566 rhiD->registerResource(this);
6567
6568 return true;
6569}
6570
6571QRhiSwapChainHdrInfo QMetalSwapChain::hdrInfo()
6572{
6573 QRhiSwapChainHdrInfo info;
6574 info.limitsType = QRhiSwapChainHdrInfo::ColorComponentValue;
6575 info.limits.colorComponentValue.maxColorComponentValue = 1;
6576 info.limits.colorComponentValue.maxPotentialColorComponentValue = 1;
6577 info.luminanceBehavior = QRhiSwapChainHdrInfo::DisplayReferred; // 1.0 = SDR white
6578 info.sdrWhiteLevel = 200; // typical value, but dummy (don't know the real one); won't matter due to being display-referred
6579
6580 if (m_window) {
6581 // Must use m_window, not window, given this may be called before createOrResize().
6582#if defined(Q_OS_MACOS)
6583 NSView *view = reinterpret_cast<NSView *>(m_window->winId());
6584 NSScreen *screen = view.window.screen;
6585 info.limits.colorComponentValue.maxColorComponentValue = screen.maximumExtendedDynamicRangeColorComponentValue;
6586 info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.maximumPotentialExtendedDynamicRangeColorComponentValue;
6587#elif defined(Q_OS_IOS)
6588 if (@available(iOS 16.0, *)) {
6589 UIView *view = reinterpret_cast<UIView *>(m_window->winId());
6590 UIScreen *screen = view.window.windowScene.screen;
6591 info.limits.colorComponentValue.maxColorComponentValue = view.window.windowScene.screen.currentEDRHeadroom;
6592 info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.potentialEDRHeadroom;
6593 }
6594#endif
6595 }
6596
6597 return info;
6598}
6599
6600QT_END_NAMESPACE
QRhiMetalData * d
QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *importDevice=nullptr)
Definition qrhimetal.mm:481
bool create(QRhi::Flags flags) override
Definition qrhimetal.mm:555
static const int SUPPORTED_STAGES
bool importedDevice
bool importedCmdQueue
Combined button and popup list for selecting options.
#define __has_feature(x)
#define QRHI_RES_RHI(t)
Definition qrhi_p.h:29
#define QRHI_RES(t, x)
Definition qrhi_p.h:28
Int aligned(Int v, Int byteAlign)
\variable QRhiVulkanQueueSubmitParams::waitSemaphoreCount
Q_DECLARE_TYPEINFO(QRhiMetalData::TextureReadback, Q_RELOCATABLE_TYPE)
#define QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES
Definition qrhimetal.mm:56
Q_DECLARE_TYPEINFO(QRhiMetalData::DeferredReleaseEntry, Q_RELOCATABLE_TYPE)
#define QRHI_METAL_DISABLE_BINARY_ARCHIVE
Definition qrhimetal.mm:51
static QT_BEGIN_NAMESPACE const int QMTL_FRAMES_IN_FLIGHT
Definition qrhimetal_p.h:23
QVarLengthArray< BufferUpdate, 16 > pendingUpdates[QMTL_FRAMES_IN_FLIGHT]
Definition qrhimetal.mm:289
id< MTLBuffer > buf[QMTL_FRAMES_IN_FLIGHT]
Definition qrhimetal.mm:284
MTLRenderPassDescriptor * currentPassRpDesc
Definition qrhimetal.mm:355
id< MTLDepthStencilState > currentDepthStencilState
Definition qrhimetal.mm:359
QMetalShaderResourceBindingsData currentShaderResourceBindingState
Definition qrhimetal.mm:360
id< MTLComputeCommandEncoder > tessellationComputeEncoder
Definition qrhimetal.mm:354
QRhiBatchedBindings< id< MTLBuffer > > currentVertexInputsBuffers
Definition qrhimetal.mm:357
id< MTLRenderCommandEncoder > currentRenderPassEncoder
Definition qrhimetal.mm:352
id< MTLCommandBuffer > cb
Definition qrhimetal.mm:350
QRhiBatchedBindings< NSUInteger > currentVertexInputOffsets
Definition qrhimetal.mm:358
id< MTLComputeCommandEncoder > currentComputePassEncoder
Definition qrhimetal.mm:353
id< MTLComputePipelineState > ps
Definition qrhimetal.mm:461
QMetalBuffer * bufferSizeBuffer
Definition qrhimetal.mm:466
QVector< QMetalBuffer * > deviceLocalWorkBuffers
Definition qrhimetal.mm:415
QMetalBuffer * acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type=WorkBufType::DeviceLocal)
QVector< QMetalBuffer * > hostVisibleWorkBuffers
Definition qrhimetal.mm:416
quint32 tescCompOutputBufferSize(quint32 patchCount) const
Definition qrhimetal.mm:434
std::array< id< MTLComputePipelineState >, 3 > vertexComputeState
Definition qrhimetal.mm:425
quint32 tescCompPatchOutputBufferSize(quint32 patchCount) const
Definition qrhimetal.mm:438
static int vsCompVariantToIndex(QShader::Variant vertexCompVariant)
id< MTLComputePipelineState > tescCompPipeline(QRhiMetal *rhiD)
id< MTLRenderPipelineState > teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline)
QMetalGraphicsPipelineData * q
Definition qrhimetal.mm:419
id< MTLComputePipelineState > vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant)
quint32 patchCountForDrawCall(quint32 vertexOrIndexCount, quint32 instanceCount) const
Definition qrhimetal.mm:443
quint32 vsCompOutputBufferSize(quint32 vertexOrIndexCount, quint32 instanceCount) const
Definition qrhimetal.mm:429
id< MTLComputePipelineState > tessControlComputeState
Definition qrhimetal.mm:426
QMetalGraphicsPipeline * q
Definition qrhimetal.mm:398
MTLPrimitiveType primitiveType
Definition qrhimetal.mm:401
id< MTLRenderPipelineState > ps
Definition qrhimetal.mm:399
QMetalBuffer * bufferSizeBuffer
Definition qrhimetal.mm:456
void setupVertexInputDescriptor(MTLVertexDescriptor *desc)
void setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc)
id< MTLDepthStencilState > ds
Definition qrhimetal.mm:400
MTLTriangleFillMode triangleFillMode
Definition qrhimetal.mm:404
id< MTLTexture > tex
Definition qrhimetal.mm:295
MTLPixelFormat format
Definition qrhimetal.mm:294
static const int MAX_COLOR_ATTACHMENTS
ColorAtt colorAtt[QMetalRenderPassDescriptor::MAX_COLOR_ATTACHMENTS]
Definition qrhimetal.mm:384
id< MTLTexture > dsResolveTex
Definition qrhimetal.mm:386
QRhiRenderTargetAttachmentTracker::ResIdList currentResIdList
Definition qrhimetal.mm:393
id< MTLTexture > dsTex
Definition qrhimetal.mm:385
id< MTLSamplerState > samplerState
Definition qrhimetal.mm:314
QVarLengthArray< Buffer, 8 > buffers
Definition qrhimetal.mm:337
QVarLengthArray< Sampler, 8 > samplers
Definition qrhimetal.mm:339
QRhiBatchedBindings< NSUInteger > bufferOffsetBatches
Definition qrhimetal.mm:341
QVarLengthArray< Texture, 8 > textures
Definition qrhimetal.mm:338
QRhiBatchedBindings< id< MTLSamplerState > > samplerBatches
Definition qrhimetal.mm:343
QRhiBatchedBindings< id< MTLTexture > > textureBatches
Definition qrhimetal.mm:342
QRhiBatchedBindings< id< MTLBuffer > > bufferBatches
Definition qrhimetal.mm:340
\variable QRhiMetalCommandBufferNativeHandles::commandBuffer
Definition qrhimetal.mm:151
void destroy()
Definition qrhimetal.mm:160
id< MTLLibrary > lib
Definition qrhimetal.mm:152
uint outputVertexCount
Definition qrhimetal.mm:155
std::array< uint, 3 > localSize
Definition qrhimetal.mm:154
QShaderDescription desc
Definition qrhimetal.mm:156
id< MTLFunction > func
Definition qrhimetal.mm:153
id< MTLRasterizationRateMap > rateMap
Definition qrhimetal.mm:319
id< CAMetalDrawable > curDrawable
Definition qrhimetal.mm:472
dispatch_semaphore_t sem[QMTL_FRAMES_IN_FLIGHT]
Definition qrhimetal.mm:473
MTLPixelFormat colorFormat
Definition qrhimetal.mm:478
MTLRenderPassDescriptor * rp
Definition qrhimetal.mm:475
CAMetalLayer * layer
Definition qrhimetal.mm:471
double lastGpuTime[QMTL_FRAMES_IN_FLIGHT]
Definition qrhimetal.mm:474
id< MTLTexture > msaaTex[QMTL_FRAMES_IN_FLIGHT]
Definition qrhimetal.mm:476
QRhiTexture::Format rhiColorFormat
Definition qrhimetal.mm:477
id< MTLTexture > tex
Definition qrhimetal.mm:304
id< MTLTexture > viewForLevel(int level)
QMetalTexture * q
Definition qrhimetal.mm:302
id< MTLTexture > perLevelViews[QRhi::MAX_MIP_LEVELS]
Definition qrhimetal.mm:307
id< MTLBuffer > stagingBuf[QMTL_FRAMES_IN_FLIGHT]
Definition qrhimetal.mm:305
QMetalTextureData(QMetalTexture *t)
Definition qrhimetal.mm:300
MTLPixelFormat format
Definition qrhimetal.mm:303
QRhiReadbackResult * result
Definition qrhimetal.mm:261
id< MTLComputePipelineState > pipelineState
Definition qrhimetal.mm:231
id< MTLDepthStencilState > depthStencilState
Definition qrhimetal.mm:226
std::array< id< MTLComputePipelineState >, 3 > tessVertexComputeState
Definition qrhimetal.mm:227
id< MTLRasterizationRateMap > rateMap
Definition qrhimetal.mm:234
id< MTLSamplerState > samplerState
Definition qrhimetal.mm:219
id< MTLBuffer > stagingBuffers[QMTL_FRAMES_IN_FLIGHT]
Definition qrhimetal.mm:215
id< MTLComputePipelineState > tessTessControlComputeState
Definition qrhimetal.mm:228
id< MTLRenderPipelineState > pipelineState
Definition qrhimetal.mm:225
id< MTLBuffer > buffers[QMTL_FRAMES_IN_FLIGHT]
Definition qrhimetal.mm:208
id< MTLTexture > views[QRhi::MAX_MIP_LEVELS]
Definition qrhimetal.mm:216
QMetalCommandBuffer cbWrapper
Definition qrhimetal.mm:244
OffscreenFrame(QRhiImplementation *rhi)
Definition qrhimetal.mm:241
QRhiReadbackDescription desc
Definition qrhimetal.mm:249
QRhiReadbackResult * result
Definition qrhimetal.mm:250
QRhiTexture::Format format
Definition qrhimetal.mm:254
void trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc)
QRhiMetalData(QRhiMetal *rhi)
Definition qrhimetal.mm:171
QVarLengthArray< BufferReadback, 2 > activeBufferReadbacks
Definition qrhimetal.mm:267
QHash< QRhiShaderStage, QMetalShader > shaderCache
Definition qrhimetal.mm:274
bool setupBinaryArchive(NSURL *sourceFileUrl=nil)
Definition qrhimetal.mm:535
void addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc)
MTLCaptureManager * captureMgr
Definition qrhimetal.mm:269
void trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc)
id< MTLLibrary > createMetalLib(const QShader &shader, QShader::Variant shaderVariant, QString *error, QByteArray *entryPoint, QShaderKey *activeKey)
QVector< DeferredReleaseEntry > releaseQueue
Definition qrhimetal.mm:238
id< MTLFunction > createMSLShaderFunction(id< MTLLibrary > lib, const QByteArray &entryPoint)
id< MTLCaptureScope > captureScope
Definition qrhimetal.mm:270
MTLRenderPassDescriptor * createDefaultRenderPass(bool hasDepthStencil, const QColor &colorClearValue, const QRhiDepthStencilClearValue &depthStencilClearValue, int colorAttCount, QRhiShadingRateMap *shadingRateMap)
QRhiMetal * q
Definition qrhimetal.mm:173
static const int TEXBUF_ALIGN
Definition qrhimetal.mm:272
API_AVAILABLE(macosx(11.0), ios(14.0)) id< MTLBinaryArchive > binArch
id< MTLCommandBuffer > newCommandBuffer()
Definition qrhimetal.mm:523
QVarLengthArray< TextureReadback, 2 > activeTextureReadbacks
Definition qrhimetal.mm:256
id< MTLDevice > dev
Definition qrhimetal.mm:174
void addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc)
id< MTLCommandQueue > cmdQueue
Definition qrhimetal.mm:175