From 038e35c8d5266cb748d704566fd8677381f006e4 Mon Sep 17 00:00:00 2001 From: qiaojbao Date: Thu, 13 Mar 2025 14:55:55 +0800 Subject: [PATCH] Update llpc from commit f1bdd306 Add Navi48 support --- .typos.toml | 2 + cmake/vkgc.cmake | 8 +- compilerutils/CMakeLists.txt | 2 +- compilerutils/plugin/CMakeLists.txt | 2 +- compilerutils/test/CMakeLists.txt | 2 +- gfxruntime/src/shaders/AdvancedBlend.hlsl | 2 +- include/vkgcDefs.h | 85 +- lgc/CMakeLists.txt | 12 + lgc/builder/BuilderImpl.cpp | 56 ++ lgc/builder/DescBuilder.cpp | 11 +- lgc/builder/ImageBuilder.cpp | 60 ++ lgc/builder/InOutBuilder.cpp | 7 + lgc/builder/MiscBuilder.cpp | 9 +- lgc/builder/SubgroupBuilder.cpp | 70 ++ lgc/builder/YCbCrAddressHandler.cpp | 8 +- lgc/builder/YCbCrConverter.cpp | 14 +- lgc/include/lgc/builder/BuilderImpl.h | 20 + .../lgc/lowering/LowerBufferOperations.h | 4 + .../lgc/lowering/LowerCooperativeMatrix.h | 23 + lgc/include/lgc/lowering/MutateEntryPoint.h | 9 + lgc/include/lgc/lowering/ShaderInputs.h | 3 + lgc/include/lgc/state/Abi.h | 14 +- lgc/include/lgc/state/AbiMetadata.h | 11 + lgc/include/lgc/state/Defs.h | 10 +- lgc/include/lgc/state/IntrinsDefs.h | 31 + lgc/include/lgc/state/PipelineState.h | 7 +- lgc/include/lgc/state/TargetInfo.h | 13 +- lgc/interface/lgc/Pipeline.h | 76 ++ lgc/interface/lgc/RayTracingLibrarySummary.h | 7 +- lgc/interface/lgc/RegStackUsage.h | 7 +- lgc/lowering/AddBufferOperationMetadata.cpp | 180 ++++ lgc/lowering/InitializeWorkgroupMemory.cpp | 6 + lgc/lowering/LgcLowering.cpp | 8 + lgc/lowering/LowerBufferOperations.cpp | 61 ++ lgc/lowering/LowerCooperativeMatrix.cpp | 792 +++++++++++++++++- lgc/lowering/LowerGpuRt.cpp | 9 + lgc/lowering/LowerInOut.cpp | 53 +- lgc/lowering/MeshTaskShader.cpp | 58 ++ lgc/lowering/MutateEntryPoint.cpp | 153 ++++ lgc/lowering/NggPrimShader.cpp | 652 ++++++++++++++ lgc/lowering/NggPrimShader.h | 15 + lgc/lowering/PassRegistry.inc | 3 + lgc/lowering/PreparePipelineAbi.cpp | 10 + lgc/lowering/RegisterMetadataBuilder.cpp | 44 + lgc/lowering/SetupTargetFeatures.cpp | 20 + lgc/lowering/ShaderInputs.cpp | 28 + lgc/lowering/ShaderMerger.cpp | 20 + lgc/lowering/VertexFetch.cpp | 9 +- lgc/state/PipelineState.cpp | 28 + lgc/state/RayTracingLibrarySummary.cpp | 14 +- lgc/state/TargetInfo.cpp | 25 + .../continuation-basic.lgc | 164 ++++ .../CpsLoweringWithDvgpr/cps-entry-point.lgc | 150 ++++ .../cps-stack-lowering.lgc | 708 ++++++++++++++++ .../CpsLoweringWithDvgpr/cps-unify-exits.lgc | 388 +++++++++ .../CpsLoweringWithDvgpr/lit.local.cfg | 27 + lgc/test/shaderdb/gfx12/CsBPermuteWave64.lgc | 48 ++ .../gfx12/CsClusteredMultiExclusive.lgc | 199 +++++ .../gfx12/buffer.atomic.ops.scope.lgc | 87 ++ .../gfx12/gfx1200wavematrix-load-wave64.lgc | 158 ++++ .../gfx12/gfx1200wavematrix-store-wave64.lgc | 154 ++++ lgc/test/shaderdb/gfx12/gfx1200wavematrix.lgc | 116 +++ lgc/test/shaderdb/gfx12/lit.local.cfg | 27 + .../gfx12/packed-accumulators-gfx12.lgc | 739 ++++++++++++++++ .../gfx12/s_buffer_load-conversion-gfx12.lgc | 336 ++++++++ lgc/util/GfxRegHandler.cpp | 50 +- lgc/util/RegStackUsage.cpp | 24 +- lgc/util/WorkgroupLayout.cpp | 6 +- llpc/context/llpcCompiler.cpp | 68 ++ llpc/context/llpcPipelineContext.cpp | 15 + llpc/context/llpcRayTracingContext.cpp | 13 +- llpc/context/llpcRayTracingContext.h | 9 +- llpc/lowering/LowerGlobals.cpp | 16 + llpc/lowering/ProcessGpuRtLibrary.cpp | 238 ++++++ llpc/lowering/ProcessGpuRtLibrary.h | 9 + .../bugs/ArrayOfVariablePointers.spvasm | 18 + .../bugs/PipelineCs_SpillThresholdEnable.pipe | 18 + .../core/FMA_TestOperandIsZero.spvasm | 18 + ...Array_Load_With_Array_Load_Result_lit.frag | 18 + .../core/OOB_Check_Dependent_Load_lit.frag | 18 + .../core/OOB_Check_Load_Array_Loop_lit.frag | 18 + .../core/OOB_Check_Load_Array_lit.frag | 18 + .../OOB_Check_Load_Array_with_Struct_lit.frag | 18 + .../OOB_Check_Load_Matrix_Vector_lit.frag | 18 + .../core/OOB_Check_Load_Matrix_lit.frag | 18 + .../OOB_Check_Load_Nested_Struct_lit.frag | 18 + .../core/OOB_Check_Load_Struct_lit.frag | 18 + .../core/OOB_Check_Load_Vector_lit.frag | 18 + .../core/OOB_Check_Multiple_Load_lit.frag | 18 + .../core/OOB_Check_Optimization_lit.frag | 18 + .../core/OOB_Check_Store_Array_lit.frag | 18 + .../core/OOB_Check_Store_Struct_lit.frag | 18 + ...onUniform_TestTexutreLoadStoreInt64.spvasm | 18 + ...ccessChain_TestBlockVectorExtract_lit.frag | 18 + ...Chain_TestGeneralVarVectorExtract_lit.frag | 18 + ...ccessChain_TestInOutVectorExtract_lit.frag | 18 + ...AccessChain_TestMultiLevelChain_lit.spvasm | 18 + ..._TestOutBlockMemberLocUnspecified_lit.vert | 18 + ...in_TestRowMajorBlockVectorExtract_lit.frag | 18 + ...essChain_TestUniformVectorExtract_lit.frag | 18 + .../core/OpAll_TestBoolConst_lit.frag | 18 + .../shaderdb/core/OpAll_TestBvec4_lit.frag | 18 + .../core/OpAny_TestBoolConst_lit.frag | 18 + .../shaderdb/core/OpAny_TestBvec2_lit.frag | 18 + .../core/OpArrayLength_TestGeneral_lit.frag | 18 + ...OpAtomicAnd_TestInt64ImageAtomicAnd.spvasm | 18 + ...change_TestInt64ImageAtomicCompSwap.spvasm | 18 + ...icCompareExchange_TestStrongCompare.spvasm | 18 + ...change_TestInt64ImageAtomicExchange.spvasm | 18 + ...pAtomicIAdd_TestInt64ImageAtomicAdd.spvasm | 18 + ...ement_TestInt64ImageAtomicDecrement.spvasm | 18 + ...omicIDecrement_TestStorageBlock_lit.spvasm | 18 + ...ement_TestInt64ImageAtomicIncrement.spvasm | 18 + ...omicIIncrement_TestStorageBlock_lit.spvasm | 18 + ...cIIncrement_TestVariablePointer_lit.spvasm | 18 + ...pAtomicISub_TestInt64ImageAtomicSub.spvasm | 18 + ...AtomicLoad_TestInt64ImageAtomicLoad.spvasm | 18 + .../OpAtomicLoad_TestStorageBlock_lit.spvasm | 18 + .../OpAtomicOr_TestInt64ImageAtomicOr.spvasm | 18 + ...pAtomicSMax_TestInt64ImageAtomicMax.spvasm | 18 + ...pAtomicSMin_TestInt64ImageAtomicMin.spvasm | 18 + ...omicStore_TestInt64ImageAtomicStore.spvasm | 18 + .../OpAtomicStore_TestStorageBlock_lit.spvasm | 18 + ...pAtomicUMax_TestInt64ImageAtomicMax.spvasm | 18 + ...pAtomicUMin_TestInt64ImageAtomicMin.spvasm | 18 + .../core/OpAtomicXXX_TestImage_lit.frag | 18 + .../OpAtomicXXX_TestStorageBlock_lit.frag | 18 + ...OpAtomicXor_TestInt64ImageAtomicXor.spvasm | 18 + .../core/OpBitCount_TestIntConst_lit.frag | 18 + .../shaderdb/core/OpBitCount_TestInt_lit.frag | 18 + .../core/OpBitCount_TestIvec4_lit.frag | 18 + .../core/OpBitCount_TestUint_lit.frag | 18 + .../OpBitFieldInsert_TestIntConst_lit.frag | 18 + .../core/OpBitFieldInsert_TestInt_lit.frag | 18 + .../core/OpBitFieldInsert_TestIvec4_lit.frag | 18 + .../core/OpBitFieldInsert_TestUint_lit.frag | 18 + .../OpBitFieldSExtract_TestGeneral_lit.frag | 18 + .../OpBitFieldSExtract_TestIntConst_lit.frag | 18 + .../OpBitFieldUExtract_TestGeneral_lit.frag | 18 + .../core/OpBitFieldUExtract_TestUint_lit.frag | 18 + .../core/OpBitReverse_TestIntConst_lit.frag | 18 + .../core/OpBitReverse_TestInt_lit.frag | 18 + .../core/OpBitReverse_TestUint_lit.frag | 18 + .../core/OpBitcast_TestIvec3ToUvec3_lit.frag | 18 + .../core/OpBitcast_TestUintToInt_lit.frag | 18 + .../core/OpBitwiseAnd_TestUvec3_lit.frag | 18 + .../core/OpBitwiseOr_TestUvec3_lit.frag | 18 + .../core/OpBitwiseXor_TestUvec3_lit.frag | 18 + .../OpBranchConditional_TestBreakInLoop.frag | 18 + ...BranchConditional_TestComplexContinue.frag | 18 + ...pBranchConditional_TestContinueInLoop.frag | 18 + .../core/OpBranchConditional_TestDoWhile.frag | 18 + .../core/OpBranchConditional_TestIf.frag | 18 + .../core/OpBranchConditional_TestIfElse.frag | 18 + .../core/OpBranchConditional_TestLoop.frag | 18 + ...pBranchConditional_TestLoopContinue.spvasm | 18 + .../OpBranchConditional_TestLoopNoBody.spvasm | 18 + .../OpBranchConditional_TestSuccessiveIf.frag | 18 + .../core/OpBranch_TestEarlyReturn.frag | 18 + .../OpBranch_TestUnreachableContinue.frag | 18 + .../OpBranch_TestUnreachableSwitch.spvasm | 18 + .../OpBranch_TestUnsequentialBlock.spvasm | 18 + ...CompositeConstruct_TestArrayConstruct.frag | 18 + ...ompositeConstruct_TestMatrixConstruct.frag | 18 + ...ompositeConstruct_TestStructConstruct.frag | 18 + ...CompositeConstruct_TestVecConstruct.spvasm | 18 + ...ompositeConstruct_TestVectorConstruct.frag | 18 + ...OpCompositeConstruct_TestVectorMatrix.frag | 18 + .../OpCompositeExtract_TestArrayExtract.frag | 18 + .../OpCompositeExtract_TestVectorExtract.frag | 18 + ...iteInsert_TestScalarInsertedToArray.spvasm | 18 + ...teInsert_TestScalarInsertedToMatrix.spvasm | 18 + .../OpCompositeInsert_TestVectorInsert.frag | 18 + .../OpConstantComposite_TestVectorMatrix.frag | 18 + .../core/OpConstantNull_TestScalar.spvasm | 18 + .../core/OpConstantNull_TestStruct.spvasm | 18 + .../OpConstantNull_TestVecMatArray.spvasm | 18 + ...pConstantNull_TestVectorMatrixArray.spvasm | 18 + .../OpConvertFToS_TestDoubleToInt_lit.frag | 18 + .../OpConvertFToS_TestVec4ToIvec4_lit.frag | 18 + .../OpConvertFToU_TestDvec3ToUvec3_lit.frag | 18 + .../OpConvertFToU_TestVec2ToUvec2_lit.frag | 18 + .../OpConvertSToF_TestIvec2ToVec2_lit.frag | 18 + .../OpConvertSToF_TestIvec4ToDvec4_lit.frag | 18 + .../OpConvertUToF_TestUvec3ToDvec3_lit.frag | 18 + .../OpConvertUToF_TestUvec3ToVec3_lit.frag | 18 + ...opyMemory_TestCopyLocalToOutput_lit.spvasm | 18 + ...yMemory_TestCopyUniformToOutput_lit.spvasm | 18 + ...opyMemory_TestExtraMemoryAccess_lit.spvasm | 18 + .../core/OpCopyMemory_TestStruct_lit.spvasm | 18 + .../core/OpCopyObject_TestNonUniform.spvasm | 18 + .../core/OpCopyObject_TestVec4_lit.spvasm | 18 + .../core/OpDPdx_TestFineCoarse_lit.frag | 18 + .../core/OpDPdy_TestFineCoarse_lit.frag | 18 + ...onGroup_TestGroupAndGroupMember_lit.spvasm | 18 + .../shaderdb/core/OpDot_TestDvec_lit.frag | 18 + .../shaderdb/core/OpDot_TestFloat_lit.frag | 18 + .../test/shaderdb/core/OpDot_TestVec_lit.frag | 18 + .../OpEmitStreamVertex_TestGeneral_lit.geom | 25 +- .../core/OpEmitVertex_TestGeneral_lit.geom | 25 +- ...ryPoint_TesListAllGlobalVariables_lit.frag | 18 + .../OpExecutionModeId_TestLocalSizeId.spvasm | 18 + .../OpExtInst_NMinNMaxNaNFlags_lit.spvasm | 18 + .../shaderdb/core/OpFAdd_TestMatrix_lit.frag | 18 + .../shaderdb/core/OpFAdd_TestVector_lit.frag | 18 + .../core/OpFConvert_TestDmat4ToMat4_lit.frag | 18 + .../OpFConvert_TestDoubleToFloat_lit.frag | 18 + .../OpFConvert_TestMat2X3ToDmat2X3_lit.frag | 18 + .../OpFConvert_TestRoundingModeRTN_lit.spvasm | 18 + .../OpFConvert_TestRoundingModeRTP_lit.spvasm | 18 + .../core/OpFConvert_TestVec3ToDvec3_lit.frag | 18 + .../shaderdb/core/OpFDiv_TestVector_lit.frag | 18 + .../shaderdb/core/OpFMod_TestDvec4_lit.frag | 18 + .../shaderdb/core/OpFMod_TestFloat_lit.frag | 18 + .../shaderdb/core/OpFMod_TestVec4_lit.frag | 18 + .../shaderdb/core/OpFMul_TestMatrix_lit.frag | 18 + .../core/OpFMul_TestOperandIsZero.spvasm | 18 + .../shaderdb/core/OpFMul_TestVector_lit.frag | 18 + .../core/OpFNegate_TestDvec3_lit.frag | 18 + .../core/OpFNegate_TestMat2X3_lit.frag | 18 + .../shaderdb/core/OpFNegate_TestVec3_lit.frag | 18 + .../core/OpFOrdEqual_TestVec3_lit.frag | 18 + .../OpFOrdGreaterThanEqual_TestFloat_lit.frag | 18 + .../OpFOrdGreaterThanEqual_TestVec3_lit.frag | 18 + .../core/OpFOrdGreaterThan_TestFloat_lit.frag | 18 + .../core/OpFOrdGreaterThan_TestVec3_lit.frag | 18 + .../OpFOrdLessThanEqual_TestFloat_lit.frag | 18 + .../OpFOrdLessThanEqual_TestVec3_lit.frag | 18 + .../core/OpFOrdLessThan_TestFloat_lit.frag | 18 + .../core/OpFOrdLessThan_TestVec3_lit.frag | 18 + .../core/OpFOrdNotEqual_TestVec3_lit.frag | 18 + .../core/OpFOrdSLessThan_TestFloat_lit.frag | 18 + .../OpFOrdULessThanEqual_TestFloat_lit.frag | 18 + .../core/OpFOrdULessThan_TestFloat_lit.frag | 18 + .../shaderdb/core/OpFSub_TestMatrix_lit.frag | 18 + .../shaderdb/core/OpFSub_TestVector_lit.frag | 18 + .../OpFunctionCall_TestArguTexArray_lit.frag | 18 + ...OpFunctionCall_TestManyParameters_lit.frag | 18 + ...ionCall_TestNumericReturnAndInout_lit.frag | 18 + .../OpFunctionCall_TestParamConst_lit.frag | 18 + ...OpFunctionCall_TestParamSimpleTex_lit.frag | 18 + .../OpFunctionCall_TestParamTexArray_lit.frag | 18 + ...nctionCall_TestParamTexNestedCall_lit.frag | 18 + ...nctionCall_TestVoidReturnAndInout_lit.frag | 18 + .../core/OpFunction_TestDontInline.spvasm | 18 + ...OpFunction_TestInlineDontInline_lit.spvasm | 18 + .../core/OpFwidth_TestFineCoarse_lit.frag | 18 + ...pGroupNonUniformBroadcast_ToShuffle.spvasm | 18 + ...roupNonUniformBroadcast_ToWaterfall.spvasm | 18 + .../core/OpIAddCarry_TestGeneral_lit.frag | 18 + .../core/OpIAddCarry_TestInt_lit.frag | 18 + .../core/OpIAddCarry_TestUvec4_lit.frag | 18 + .../shaderdb/core/OpIAdd_TestVector_lit.frag | 18 + .../shaderdb/core/OpIEqual_TestIvec2_lit.frag | 18 + .../shaderdb/core/OpIMul_TestVector_lit.frag | 18 + .../core/OpINotEqual_TestIvec2_lit.frag | 18 + .../core/OpINotEqual_TestSignMatch_lit.vert | 18 + .../OpINotEqual_TestSignedUnsigned_lit.frag | 18 + .../core/OpISubBorrow_TestGeneral_lit.frag | 18 + .../core/OpISubBorrow_TestInt_lit.frag | 18 + .../shaderdb/core/OpISub_TestVector_lit.frag | 18 + .../core/OpImageDrefGather_TestBasic_lit.frag | 18 + .../OpImageDrefGather_TestOffset_lit.frag | 18 + ...refGather_TestTextureGatherOffset_lit.frag | 18 + ...efGather_TestTextureGatherOffsets_lit.frag | 18 + ...ImageDrefGather_TestTextureGather_lit.frag | 18 + ...mageExplicitLod_TestDrefLodOffset_lit.frag | 18 + ..._Test2DMSArray_disableShadowTable_lit.frag | 18 + .../core/OpImageFetch_Test2DMSArray_lit.frag | 18 + .../core/OpImageFetch_Test2DMS_lit.frag | 18 + .../core/OpImageFetch_TestBasic_lit.frag | 18 + .../OpImageFetch_TestDynamicOffset.spvasm | 18 + .../OpImageFetch_TestIntegerSampler_lit.frag | 18 + .../core/OpImageFetch_TestOffset_lit.frag | 18 + ...OpImageFetch_TestTexelFetchOffset_lit.frag | 18 + .../core/OpImageFetch_TestTexelFetch_lit.frag | 18 + .../core/OpImageGather_TestBasic_lit.frag | 18 + .../OpImageGather_TestConstOffsets_lit.frag | 18 + ...pImageGather_TestDrefConstOffsets_lit.frag | 18 + .../OpImageGather_TestIntegerSampler.frag | 18 + .../core/OpImageGather_TestOffset_lit.frag | 18 + ...geGather_TestTextureGatherBiasLod_lit.frag | 18 + ...ageGather_TestTextureGatherOffset_lit.frag | 18 + ...geGather_TestTextureGatherOffsets_lit.frag | 18 + .../OpImageGather_TestTextureGather_lit.frag | 18 + ...ueryLevels_TestTextureQueryLevels_lit.frag | 18 + .../core/OpImageQueryLod_TestBasic_lit.frag | 18 + ...ImageQueryLod_TestTextureQueryLod_lit.frag | 18 + ...mageQuerySamples_TestImageSamples_lit.frag | 18 + ...geQuerySamples_TestTextureSamples_lit.frag | 18 + ...ImageQuerySizeLod_TestTextureSize_lit.frag | 18 + .../core/OpImageQuerySize_TestBasic_lit.frag | 18 + .../OpImageQuerySize_TestImageSize_lit.frag | 18 + .../OpImageQuerySize_TestSeparated_lit.frag | 18 + .../OpImageQuerySize_TestTextureSize_lit.frag | 18 + .../core/OpImageRead_TestImageLoad_lit.frag | 18 + .../OpImageRead_TestInt64ImageLoad.spvasm | 18 + .../OpImageRead_TestNonVec4Data_lit.spvasm | 18 + .../OpImageRead_TestSubpassInput_lit.frag | 18 + ...mageSampleDrefExplicitLod_TestDrefLod.frag | 18 + ...SampleDrefExplicitLod_TestTextureGrad.frag | 18 + ...eDrefExplicitLod_TestTextureGradClamp.frag | 18 + ...DrefExplicitLod_TestTextureGradOffset.frag | 18 + ...eSampleDrefExplicitLod_TestTextureLod.frag | 18 + ...eDrefExplicitLod_TestTextureLodOffset.frag | 18 + ...OpImageSampleDrefImplicitLod_TestDref.frag | 18 + ...ageSampleDrefImplicitLod_TestDrefBias.frag | 18 + ...efImplicitLod_TestImageWithoutDepth.spvasm | 18 + ...mageSampleDrefImplicitLod_TestTexture.frag | 18 + ...eDrefImplicitLod_TestTextureBiasClamp.frag | 18 + ...ampleDrefImplicitLod_TestTextureClamp.frag | 18 + ...eDrefImplicitLod_TestTextureGradClamp.frag | 18 + ...mplicitLod_TestTextureGradOffsetClamp.frag | 18 + ...mpleDrefImplicitLod_TestTextureOffset.frag | 18 + ...refImplicitLod_TestTextureOffsetClamp.frag | 18 + .../OpImageSampleExplicitLod_TestLod_lit.frag | 18 + ...eExplicitLod_TestTextureGradClamp_lit.frag | 18 + ...ExplicitLod_TestTextureGradOffset_lit.frag | 18 + ...SampleExplicitLod_TestTextureGrad_lit.frag | 18 + ...eExplicitLod_TestTextureLodOffset_lit.frag | 18 + ...eSampleExplicitLod_TestTextureLod_lit.frag | 18 + ...mageSampleImplicitLod_Test1DArray_lit.frag | 18 + .../OpImageSampleImplicitLod_Test1D_lit.frag | 18 + ...mageSampleImplicitLod_Test2DArray_lit.frag | 18 + ...ImageSampleImplicitLod_Test2DRect_lit.frag | 18 + .../OpImageSampleImplicitLod_Test3D_lit.frag | 18 + ...ImplicitLod_TestArrayDirectAccess_lit.frag | 18 + ...pImageSampleImplicitLod_TestBasic_lit.frag | 18 + ...OpImageSampleImplicitLod_TestBias_lit.frag | 18 + ...geSampleImplicitLod_TestCubeArray_lit.frag | 18 + ...eSampleImplicitLod_TestCubeShadow_lit.frag | 18 + ...OpImageSampleImplicitLod_TestCube_lit.frag | 18 + ...ageSampleImplicitLod_TestDrefGrad_lit.frag | 18 + ...OpImageSampleImplicitLod_TestGrad_lit.frag | 18 + ...pleImplicitLod_TestIntegerSampler_lit.frag | 18 + ...Lod_TestMultiDimArrayDirectAccess_lit.frag | 18 + ...ImageSampleImplicitLod_TestOffset_lit.frag | 18 + ...mplicitLod_TestProjDrefGradOffset_lit.frag | 18 + ...ageSampleImplicitLod_TestSeparate_lit.frag | 18 + ...licitLod_TestSignExtendZeroExtend_lit.frag | 18 + ...eImplicitLod_TestTextureBiasClamp_lit.frag | 18 + ...ampleImplicitLod_TestTextureClamp_lit.frag | 18 + ...eImplicitLod_TestTextureGradClamp_lit.frag | 18 + ...citLod_TestTextureGradOffsetClamp_lit.frag | 18 + ...mplicitLod_TestTextureOffsetClamp_lit.frag | 18 + ...mpleImplicitLod_TestTextureOffset_lit.frag | 18 + ...mageSampleImplicitLod_TestTexture_lit.frag | 18 + ...leProjDrefExplicitLod_TestProjDrefLod.frag | 18 + ...ojDrefExplicitLod_TestTextureProjGrad.frag | 18 + ...ExplicitLod_TestTextureProjGradOffset.frag | 18 + ...rojDrefExplicitLod_TestTextureProjLod.frag | 18 + ...fExplicitLod_TestTextureProjLodOffset.frag | 18 + ...ampleProjDrefImplicitLod_TestProjComp.frag | 18 + ...ampleProjDrefImplicitLod_TestProjDref.frag | 18 + ...leProjDrefImplicitLod_TestTextureProj.frag | 18 + ...pImageSampleProjDref_TestProjDrefBias.frag | 18 + ...mageSampleProjExplicitLod_TestProjLod.frag | 18 + ...leProjExplicitLod_TestTextureProjGrad.frag | 18 + ...ExplicitLod_TestTextureProjGradOffset.frag | 18 + ...pleProjExplicitLod_TestTextureProjLod.frag | 18 + ...jExplicitLod_TestTextureProjLodOffset.frag | 18 + ...OpImageSampleProjImplicitLod_TestProj.frag | 18 + ...ageSampleProjImplicitLod_TestProjBias.frag | 18 + ...SampleProjImplicitLod_TestTextureProj.frag | 18 + ...ProjImplicitLod_TestTextureProjOffset.frag | 18 + ...xplicitLod_TestTextureInNonFragShader.vert | 18 + .../OpImageSample_TestSeparateSampler.pipe | 18 + ...OpImageSample_TestSeparateSampler_lit.frag | 18 + ...rseDrefGather_TestSparseTextureGather.frag | 18 + ...eSparseGather_TestSparseTextureGather.frag | 18 + ...Gather_TestSparseTextureGatherBiasLod.frag | 18 + ...SparseRead_TestInt64SparseImageLoad.spvasm | 18 + ...OpImageSparseRead_TestSparseImageLoad.frag | 18 + ...xplicitLod_TestSparseTextureGradClamp.frag | 18 + ...eDrefExplicitLod_TestSparseTextureLod.frag | 18 + ...mpleDrefImplicitLod_TestSparseTexture.frag | 18 + ...mplicitLod_TestSparseTextureBiasClamp.frag | 18 + ...refImplicitLod_TestSparseTextureClamp.frag | 18 + ...mplicitLod_TestSparseTextureGradClamp.frag | 18 + ...tLod_TestSparseTextureGradOffsetClamp.frag | 18 + ...licitLod_TestSparseTextureOffsetClamp.frag | 18 + ...mpleExplicitLod_TestSparseTextureGrad.frag | 18 + ...xplicitLod_TestSparseTextureGradClamp.frag | 18 + ...ampleExplicitLod_TestSparseTextureLod.frag | 18 + ...seSampleImplicitLod_TestSparseTexture.frag | 18 + ...mplicitLod_TestSparseTextureBiasClamp.frag | 18 + ...pleImplicitLod_TestSparseTextureClamp.frag | 18 + ...mplicitLod_TestSparseTextureGradClamp.frag | 18 + ...tLod_TestSparseTextureGradOffsetClamp.frag | 18 + ...licitLod_TestSparseTextureOffsetClamp.frag | 18 + ...ImageSparseTexelsResident_TestGeneral.frag | 18 + .../OpImageWrite_TestBufferNonVec4Data.spvasm | 18 + .../core/OpImageWrite_TestImageStore.frag | 18 + .../OpImageWrite_TestInt64ImageStore.spvasm | 18 + .../OpImageWrite_TestIntImage_Aliased.spvasm | 18 + .../core/OpImageWrite_TestNonVec4Data.spvasm | 18 + .../shaderdb/core/OpIsInf_TestDouble_lit.frag | 18 + .../shaderdb/core/OpIsInf_TestFloat_lit.frag | 18 + .../shaderdb/core/OpIsNan_TestDvec2_lit.frag | 18 + .../shaderdb/core/OpIsNan_TestFloat_lit.frag | 18 + .../shaderdb/core/OpIsNan_TestVec4_lit.frag | 18 + .../core/OpKill_TestFunctionBranch_lit.spvasm | 18 + .../core/OpKill_TestFunctionDynamic_lit.frag | 18 + .../OpKill_TestFunctionInlineReturn_lit.frag | 18 + .../core/OpKill_TestFunctionInline_lit.frag | 18 + .../OpKill_TestFunctionUnreachable_lit.spvasm | 18 + .../shaderdb/core/OpKill_TestGeneral_lit.frag | 18 + .../shaderdb/core/OpLine_TestGeneral.spvasm | 18 + .../core/OpLoad_TestAggregate_lit.frag | 18 + .../shaderdb/core/OpLoad_TestMatrix_lit.frag | 18 + .../shaderdb/core/OpLogicalAnd_TestBvec4.frag | 18 + .../core/OpLogicalEqual_TestGeneral.frag | 18 + .../OpLogicalNotEqual_TestGeneral_lit.frag | 18 + .../shaderdb/core/OpLogicalNot_TestBasic.frag | 18 + .../core/OpLogicalNot_TestBool_lit.frag | 18 + .../shaderdb/core/OpLogicalOr_TestBvec2.frag | 18 + .../OpLoopMerge_TestDependencyLength.spvasm | 18 + .../core/OpLoopMerge_TestDontUnroll.spvasm | 18 + ...OpLoopMerge_TestIterationControls_lit.frag | 18 + .../core/OpLoopMerge_TestPartialCount.spvasm | 18 + ...MatrixTimesMatrix_TestDmat2xDmat2_lit.frag | 18 + ...ixTimesMatrix_TestDmat4X3xDmat3X4_lit.frag | 18 + ...trixTimesMatrix_TestMat2X3xMat4X3_lit.frag | 18 + ...OpMatrixTimesMatrix_TestMat2xMat2_lit.frag | 18 + ...OpMatrixTimesMatrix_TestMat3xMat3_lit.frag | 18 + ...OpMatrixTimesMatrix_TestMat4xMat4_lit.frag | 18 + ...atrixTimesScalar_TestDmat3xDouble_lit.frag | 18 + ...atrixTimesScalar_TestDoublexDmat4_bit.frag | 18 + ...atrixTimesScalar_TestMat3X4xFloat_lit.frag | 18 + ...TimesScalar_TestMat4X2xConstFloat_lit.frag | 18 + ...trixTimesVector_TestDmat2X3xDvec2_lit.frag | 18 + ...MatrixTimesVector_TestDmat2xDvec2_lit.frag | 18 + ...trixTimesVector_TestDmat4X2xDvec4_lit.frag | 18 + ...MatrixTimesVector_TestMat3X4xVec4_lit.frag | 18 + ...OpMatrixTimesVector_TestMat3xVec3_lit.frag | 18 + ...ryBarrier_TestMemoryBarrierBuffer_lit.frag | 18 + ...oryBarrier_TestMemoryBarrierImage_lit.frag | 18 + .../core/OpModuleProcessed_TestGeneral.spvasm | 18 + .../shaderdb/core/OpNoLine_TestGeneral.spvasm | 18 + .../shaderdb/core/OpNop_TestGeneral.spvasm | 18 + .../shaderdb/core/OpNot_TestUint_lit.frag | 18 + .../OpOuterProduct_TestDvec3xDvec2_lit.frag | 18 + .../OpOuterProduct_TestVec2xVec2_lit.frag | 18 + .../OpOuterProduct_TestVec2xVec4_lit.frag | 18 + .../OpOuterProduct_TestVec3xVec2_lit.frag | 18 + .../OpOuterProduct_TestVec3xVec4_lit.frag | 18 + .../core/OpPhi_Switch_FunctionCall_Phi.spvasm | 18 + .../OpPhi_TestMultiIncomingFromSwitch.spvasm | 18 + .../core/OpPhi_TestPhiInSelfLoop.spvasm | 18 + .../shaderdb/core/OpPtrDiff_Buffer_mem.spvasm | 18 + .../OpPtrDiff_TestVariablePointers.spvasm | 18 + .../core/OpPtrDiff_Workgroup_mem.spvasm | 18 + llpc/test/shaderdb/core/OpPtrEqualTest.spvasm | 18 + .../core/OpPtrEqual_TestNullPointerCmp.spvasm | 18 + .../OpPtrNotEqual_TestWorkgroupCmp.spvasm | 18 + .../OpQuantizeToF16_TestGeneral_lit.spvasm | 18 + .../OpReturnValue_TestEarlyReturn_lit.frag | 18 + ...eturnValue_TestReturnInNestedLoop_lit.frag | 18 + .../shaderdb/core/OpSDiv_TestIvec2_lit.frag | 18 + .../core/OpSDotAccSat_TestIVec.spvasm | 18 + .../core/OpSDotAccSat_TestIVec16bit.spvasm | 18 + .../test/shaderdb/core/OpSDot_TestIVec.spvasm | 18 + ...erThanEqual_TestSignedAndUnsigned_lit.frag | 18 + ...GreaterThan_TestSignedAndUnsigned_lit.frag | 18 + ...ssThanEqual_TestSignedAndUnsigned_lit.frag | 18 + ...OpSLessThan_TestSignedAndUnsigned_lit.frag | 18 + .../shaderdb/core/OpSMod_TestInt_lit.frag | 18 + .../shaderdb/core/OpSMod_TestIvec2_lit.frag | 18 + .../core/OpSMulExtended_TestGeneral_lit.frag | 18 + .../core/OpSMulExtended_TestInt_lit.frag | 18 + .../shaderdb/core/OpSNegate_TestInt_lit.frag | 18 + .../core/OpSNegate_TestUvec2_lit.frag | 18 + .../core/OpSUDotAccSat_TestIUVec.spvasm | 18 + .../shaderdb/core/OpSUDot_TestSIVec.spvasm | 18 + .../core/OpSelect_TestDescriptorArray.spvasm | 18 + .../core/OpSelect_TestGeneral_lit.frag | 18 + ...OpSelect_TestSelectBetweenObjects_lit.frag | 18 + .../core/OpSelect_TestSharedVariable.spvasm | 18 + ...pSelectionMerge_TestDontFlatten_lit.spvasm | 18 + .../OpSelectionMerge_TestFlatten_lit.spvasm | 18 + .../OpShiftLeftLogical_TestIvec2_lit.frag | 18 + .../OpShiftLeftLogical_TestUvec3_lit.frag | 18 + .../OpShiftRightArithmetic_TestIvec4_lit.frag | 18 + .../OpShiftRightLogical_TestUvec3_lit.frag | 18 + .../shaderdb/core/OpShiftXXX_TestInt_lit.frag | 18 + .../core/OpShiftXXX_TestUInt_lit.frag | 18 + .../core/OpShift_Testi16shift64_lit.spvasm | 18 + .../core/OpShift_Testi32shift64_lit.spvasm | 18 + .../core/OpShift_Testi64shift16_lit.spvasm | 18 + .../OpSourceContinued_TestNormalString.spvasm | 18 + .../core/OpSource_TestSourceString.spvasm | 18 + .../core/OpSource_TestUnknownLang.spvasm | 18 + ...OpSpecConstantOp_TestArithLogicOp_lit.frag | 18 + ...ecConstantOp_TestCompositeExtract_lit.frag | 18 + ...cConstantOp_TestCompositeInsert_lit.spvasm | 18 + ...pecConstantOp_TestNestedSpecConstOp.spvasm | 18 + ...onstantOp_TestQuantizeFlushToZero_lit.pipe | 18 + ...pecConstantOp_TestQuantizeToF16_lit.spvasm | 18 + .../OpSpecConstantOp_TestVectorRelated.spvasm | 18 + .../OpSpecConstantOp_TestVectorShuffle.frag | 18 + .../shaderdb/core/OpStore_TestMatrix_lit.frag | 18 + .../OpSwitch_Test64BitCaseLabel_lit.spvasm | 18 + .../core/OpSwitch_TestFallThrough_lit.frag | 18 + .../core/OpSwitch_TestGeneral_lit.frag | 18 + .../core/OpSwitch_TestMergedBranches_lit.frag | 18 + .../core/OpTranspose_TestDmat2X3_lit.frag | 18 + .../core/OpTranspose_TestMat2X3_lit.frag | 18 + .../core/OpTranspose_TestMat3X4_lit.frag | 18 + .../core/OpTranspose_TestMat4_lit.frag | 18 + ...peSampledImage_TestWaterfallInsertion.frag | 18 + ...peSampledImage_TestWaterfallScalarize.frag | 18 + ...Image_TestWaterfallScalarizeVgprLimit.frag | 18 + ...age_TestWaterfallScalarize_MultiBlock.frag | 18 + ...age_TestWaterfallScalarize_SharedDesc.frag | 18 + llpc/test/shaderdb/core/OpUDiv_TestUvec3.frag | 18 + .../shaderdb/core/OpUDiv_TestUvec3_lit.frag | 18 + .../core/OpUDotAccSat_TestUVec.spvasm | 18 + .../core/OpUDotAccSat_TestUVec16bit.spvasm | 18 + .../test/shaderdb/core/OpUDot_TestUVec.spvasm | 18 + llpc/test/shaderdb/core/OpUMod_TestUInt.frag | 18 + .../shaderdb/core/OpUMod_TestUInt_lit.frag | 18 + .../shaderdb/core/OpUMod_TestUintConst.frag | 18 + .../core/OpUMod_TestUintConst_lit.frag | 18 + llpc/test/shaderdb/core/OpUMod_TestUvec3.frag | 18 + .../shaderdb/core/OpUMod_TestUvec3_lit.frag | 18 + .../core/OpUMulExtended_TestUint.frag | 18 + .../core/OpUMulExtended_TestUint_lit.frag | 18 + .../core/OpUmulExtended_TestGeneral.frag | 18 + .../core/OpUmulExtended_TestGeneral_lit.frag | 18 + .../core/OpUndef_TestRuntimeArray.spvasm | 18 + .../core/OpUndef_TestScalarArray.spvasm | 18 + .../core/OpUndef_TestUndefImage.spvasm | 18 + .../core/OpUndef_TestUndefImage_lit.spvasm | 18 + .../core/OpUnreachable_TestGeneral.spvasm | 18 + .../core/OpUnreachable_TestGeneral_lit.spvasm | 18 + .../core/OpVariable_TestInitializer.spvasm | 18 + .../OpVariable_TestInitializer_lit.spvasm | 18 + .../OpVectorExtractDynamic_TestDvec3.frag | 18 + .../OpVectorExtractDynamic_TestDvec3_lit.frag | 18 + ...pVectorExtractDynamic_TestUintIndex.spvasm | 18 + ...torExtractDynamic_TestUintIndex_lit.spvasm | 18 + .../core/OpVectorInsertDynamic_TestDvec2.frag | 18 + .../OpVectorInsertDynamic_TestDvec2_lit.frag | 18 + .../core/OpVectorInsertDynamic_TestVec4.frag | 18 + .../OpVectorInsertDynamic_TestVec4_lit.frag | 18 + ...ffle_TestDifferentInputVecSizes_lit.spvasm | 18 + .../core/OpVectorShuffle_TestDvec.frag | 18 + ...ectorShuffle_TestDvec4UndefVariable.spvasm | 18 + ...rShuffle_TestDvec4UndefVariable_lit.spvasm | 18 + ...Shuffle_TestDvec4UnspecifiedChannel.spvasm | 18 + ...fle_TestDvec4UnspecifiedChannel_lit.spvasm | 18 + .../core/OpVectorShuffle_TestDvec_lit.frag | 18 + .../core/OpVectorShuffle_TestVec.frag | 18 + ...rShuffle_TestVec4UnspecifiedChannel.spvasm | 18 + ...ffle_TestVec4UnspecifiedChannel_lit.spvasm | 18 + .../core/OpVectorShuffle_TestVec_lit.frag | 18 + ...OpVectorTimesMatrix_TestDvec2xDmat4X2.frag | 18 + ...ctorTimesMatrix_TestDvec2xDmat4X2_lit.frag | 18 + ...OpVectorTimesMatrix_TestDvec3xDmat2X3.frag | 18 + ...ctorTimesMatrix_TestDvec3xDmat2X3_lit.frag | 18 + ...OpVectorTimesMatrix_TestDvec3xDmat4X3.frag | 18 + ...ctorTimesMatrix_TestDvec3xDmat4X3_lit.frag | 18 + .../OpVectorTimesMatrix_TestVec2xMat3X2.frag | 18 + ...VectorTimesMatrix_TestVec2xMat3X2_lit.frag | 18 + .../OpVectorTimesMatrix_TestVec4xMat4.frag | 18 + ...OpVectorTimesMatrix_TestVec4xMat4_lit.frag | 18 + .../OpVectorTimesScalar_TestDoublexDvec4.frag | 18 + ...ectorTimesScalar_TestDoublexDvec4_lit.frag | 18 + .../OpVectorTimesScalar_TestDvec4xDouble.frag | 18 + ...ectorTimesScalar_TestDvec4xDouble_lit.frag | 18 + .../OpVectorTimesScalar_TestIvec2xInt.frag | 18 + ...OpVectorTimesScalar_TestIvec2xInt_lit.frag | 18 + .../OpVectorTimesScalar_TestUvec4xUint.frag | 18 + ...pVectorTimesScalar_TestUvec4xUint_lit.frag | 18 + ...VectorTimesScalar_TestVec3xConstFloat.frag | 18 + ...orTimesScalar_TestVec3xConstFloat_lit.frag | 18 + .../OverrideThreadGroupSize16X16X1.spvasm | 18 + .../core/OverrideThreadGroupSize8X8X1.spvasm | 18 + .../test/shaderdb/core/ShaderRetInLoop.spvasm | 18 + .../TestEnableImplicitInvariantExports.vert | 18 + .../TestForceNonUniformResourceIndex.frag | 18 + .../TestNoContractBackwardPropagation.spvasm | 18 + .../TestNoContractForwardPropagation.spvasm | 18 + .../shaderdb/core/TestXfbStateMetadata.vert | 18 + .../shaderdb/debug_info/FunctionCall.pipe | 18 + .../debug_info/NonSemanticShaderDebug.pipe | 18 + ...PipelineGsTess_TestVsTesGsMergeShader.pipe | 18 + .../PipelineGs_TestVsGSMergeShader.pipe | 18 + .../DebugInfo_DebugCompilationUnit.spvasm | 18 + .../avoid/DebugInfo_DebugDeclare.spvasm | 18 + .../avoid/DebugInfo_DebugExpression.spvasm | 18 + .../DebugInfo_DebugFunctionDeclaration.spvasm | 18 + .../avoid/DebugInfo_DebugLexicalBlock.spvasm | 18 + .../avoid/DebugInfo_DebugSourceNoText.spvasm | 18 + .../avoid/DebugInfo_DebugTypeArray.spvasm | 18 + .../avoid/DebugInfo_DebugTypeEnum.spvasm | 18 + .../avoid/DebugInfo_DebugTypeFunction.spvasm | 18 + .../DebugInfo_DebugTypeInheritance.spvasm | 18 + .../avoid/DebugInfo_DebugTypePointer.spvasm | 18 + .../avoid/DebugInfo_DebugTypeQualifier.spvasm | 18 + .../avoid/DebugInfo_DebugTypeVector.spvasm | 18 + .../avoid/DebugInfo_DebugTypedef.spvasm | 18 + .../avoid/DebugInfo_TestFsBasic.frag | 18 + .../avoid/DebugInfo_TestVsBasic.vert | 18 + .../GlslBadEntryPointName.frag | 18 + .../error_reporting/GlslDuplicateStage.frag | 18 + .../error_reporting/InvalidGfxip.frag | 18 + .../MultipleThreadsVerboseOutput.spvasm | 18 + .../error_reporting/SpirvBadEntryPoint.spvasm | 18 + .../SpirvDuplicateStage.spvasm | 18 + .../error_reporting/SpirvInvalidOpcode.spvasm | 18 + .../SpirvMissingEntryPoint.spvasm | 18 + .../SpirvValidationFailure.spvasm | 18 + .../SpirvWildcardAndEntryPoint.spvasm | 18 + .../Ext16bitStorage_TestFpRoundMode.spvasm | 18 + .../Ext16bitStorage_TestFsInput_lit.frag | 18 + .../Ext16bitStorage_TestGsInput_lit.geom | 25 +- .../Ext16bitStorage_TestGsOutput_lit.geom | 25 +- .../Ext16bitStorage_TestTcsInput_lit.tesc | 25 +- .../Ext16bitStorage_TestTcsOutput_lit.tesc | 25 +- .../Ext16bitStorage_TestTesInput_lit.tese | 25 +- .../Ext16bitStorage_TestTesOutput_lit.tese | 25 +- .../Ext16bitStorage_TestVsInput_lit.vert | 18 + .../Ext16bitStorage_TestVsOutput_lit.vert | 18 + ...ExtBufferReference_TestPointerCasting.frag | 18 + .../ExtDemoteToHelper_TestDemote.frag | 18 + ...DemoteToHelper_TestIsHelperInvocation.frag | 18 + ...ExtDeviceGroup_TestGraphicsShader_lit.vert | 18 + ...xtExplicitVertexParam_TestBuiltIn_lit.frag | 18 + ...xplicitVertexParam_TestInterpFunc_lit.frag | 18 + .../ExtFragMask_TestFragFetch_lit.frag | 18 + .../ExtGcnShader_TestBuiltInFunc_lit.frag | 18 + .../ExtGoogleHlslFunc_TestGeneral.spvasm | 18 + .../ExtMultiView_TestSubpassLoad_lit.pipe | 18 + ...ExtShaderBallot_TestArithmeticAMD_lit.frag | 18 + ...tShaderBallot_TestArithmeticData16AMD.frag | 18 + .../ExtShaderBallot_TestGeneral_lit.frag | 18 + .../ExtShaderBallot_TestMiscAMD_lit.frag | 18 + .../ExtShaderBallot_TestSwizzleAMD_lit.frag | 18 + ...ExtShaderFloat16Fetch_TestFetchData16.frag | 18 + ...xtShaderFloat16Fetch_TestGatherData16.frag | 18 + ...tShaderFloat16Fetch_TestImagingData16.frag | 18 + ...ShaderFloat16Fetch_TestSamplingData16.frag | 18 + ...loat16Fetch_TestSubpassSamplingData16.frag | 18 + ...tShaderFloat16_TestAngleTrigFuncs_lit.frag | 18 + ...ExtShaderFloat16_TestArithmeticOp_lit.frag | 18 + .../ExtShaderFloat16_TestCommonFuncs_lit.frag | 18 + .../ExtShaderFloat16_TestDerivFuncs_lit.frag | 18 + ...haderFloat16_TestExponentialFuncs_lit.frag | 18 + ...xtShaderFloat16_TestGeometryFuncs_lit.frag | 18 + .../ExtShaderFloat16_TestInterpFuncs_lit.frag | 18 + .../ExtShaderFloat16_TestMatrixFuncs_lit.frag | 18 + ...ShaderFloat16_TestPackUnpackFuncs_lit.frag | 18 + ...ShaderFloat16_TestRelationalFuncs_lit.frag | 18 + .../ExtShaderFloat16_TestSpecConst.frag | 18 + ...tShaderFloat16_TestTrinaryMinMaxFuncs.frag | 18 + .../ExtShaderFloat16_TestVectorMatrixOp.frag | 18 + .../ExtShaderInt16_TestBasicArithInt16.frag | 18 + .../ExtShaderInt16_TestBasicArithUint16.frag | 18 + .../ExtShaderInt16_TestBuiltInFuncFrexp.frag | 18 + .../ExtShaderInt16_TestSpecConst.frag | 18 + ...ExtShaderInt16_TestTrinaryMinMaxFuncs.frag | 18 + .../ExtShaderInt64_TestArithmeticOp_lit.frag | 18 + .../ExtShaderInt64_TestBitwiseOp_lit.frag | 18 + .../ExtShaderInt64_TestBuiltInFunc_lit.frag | 18 + .../ExtShaderInt64_TestRelationalOp_lit.frag | 18 + .../ExtShaderInt64_TestShiftOp_lit.frag | 18 + .../ExtShaderInt64_TestTypeCast_lit.frag | 18 + .../ExtShaderInt8_TestBasicArithInt8.frag | 18 + .../ExtShaderInt8_TestBasicArithUint8.frag | 18 + .../extensions/ExtShaderInt8_TestFsInOut.frag | 18 + .../extensions/ExtShaderInt8_TestGsInOut.geom | 25 +- .../ExtShaderInt8_TestTcsInOut.tesc | 25 +- .../ExtShaderInt8_TestTesInOut.tese | 25 +- .../ExtShaderInt8_TestVsInOut_lit.vert | 18 + .../ExtShaderVote_TestGeneral_lit.frag | 18 + ...ubgroupQuad_TestSubgroupQuadBroadcast.frag | 18 + ...roupQuad_TestSubgroupQuadSwapDiagonal.frag | 18 + ...roupQuad_TestSubgroupQuadSwapVertical.frag | 18 + .../ExtTrinaryMinMax_TestGeneral_lit.frag | 18 + .../ExtXfb_TessGsDoubleOutput_lit.geom | 25 +- .../ExtXfb_TestGsFloatOutput_lit.geom | 25 +- .../ExtXfb_TestNoXfbExecutionMode.spvasm | 18 + .../ExtXfb_TestTesDoubleOutput_lit.tese | 25 +- .../ExtXfb_TestTesFloatOutput_lit.tese | 25 +- .../ExtXfb_TestVsDoubleOutput_lit.vert | 18 + .../ExtXfb_TestVsFloatOutput_lit.vert | 18 + ...ObjFloat16_TestTrinaryMinMaxFuncs_lit.frag | 18 + .../extensions/OpExtInst_TestAbsDouble.frag | 18 + .../extensions/OpExtInst_TestAbsFloat.frag | 18 + .../extensions/OpExtInst_TestAbsInt.frag | 18 + .../extensions/OpExtInst_TestAbsIvec4.frag | 18 + .../extensions/OpExtInst_TestAbsVec4.frag | 18 + .../extensions/OpExtInst_TestAcos.frag | 18 + .../extensions/OpExtInst_TestAcosFloat.frag | 18 + .../extensions/OpExtInst_TestAcosh.frag | 18 + .../extensions/OpExtInst_TestAcoshFloat.frag | 18 + .../extensions/OpExtInst_TestAsin.frag | 18 + .../extensions/OpExtInst_TestAsinFloat.frag | 18 + .../extensions/OpExtInst_TestAsinh.frag | 18 + .../extensions/OpExtInst_TestAsinhFloat.frag | 18 + .../extensions/OpExtInst_TestAtan.frag | 18 + .../extensions/OpExtInst_TestAtan2.frag | 18 + .../extensions/OpExtInst_TestAtan2Float.frag | 18 + .../extensions/OpExtInst_TestAtanFloat.frag | 18 + .../extensions/OpExtInst_TestAtanh.frag | 18 + .../extensions/OpExtInst_TestAtanhFloat.frag | 18 + .../extensions/OpExtInst_TestCeilDouble.frag | 18 + .../extensions/OpExtInst_TestCeilFloat.frag | 18 + .../OpExtInst_TestCeilVec4Const.frag | 18 + .../extensions/OpExtInst_TestClampBasic.frag | 18 + .../extensions/OpExtInst_TestClampDouble.frag | 18 + .../extensions/OpExtInst_TestClampFloat.frag | 18 + .../extensions/OpExtInst_TestClampInt.frag | 18 + .../extensions/OpExtInst_TestClampUint.frag | 18 + .../extensions/OpExtInst_TestCos.frag | 18 + .../OpExtInst_TestCosVec4Const.frag | 18 + .../extensions/OpExtInst_TestCosh.frag | 18 + .../extensions/OpExtInst_TestCoshFloat.frag | 18 + .../extensions/OpExtInst_TestCrossDouble.frag | 18 + .../extensions/OpExtInst_TestCrossFloat.frag | 18 + .../extensions/OpExtInst_TestCrossVec4.frag | 18 + .../extensions/OpExtInst_TestDegrees.frag | 18 + .../OpExtInst_TestDegreesVec4Const.frag | 18 + .../OpExtInst_TestDeterminantDmat.frag | 18 + .../OpExtInst_TestDeterminantMat.frag | 18 + .../OpExtInst_TestDeterminantMat2.frag | 18 + .../OpExtInst_TestDeterminantMat4.frag | 18 + .../OpExtInst_TestDistanceBasic.frag | 18 + .../OpExtInst_TestDistanceDouble.frag | 18 + .../OpExtInst_TestDistanceFloat.frag | 18 + .../OpExtInst_TestDistanceVec4.frag | 18 + .../extensions/OpExtInst_TestExp.frag | 18 + .../extensions/OpExtInst_TestExp2.frag | 18 + .../OpExtInst_TestExp2Vec4Const.frag | 18 + .../OpExtInst_TestExpVec4Const.frag | 18 + .../extensions/OpExtInst_TestFaceForward.frag | 18 + .../OpExtInst_TestFaceForwardDouble.frag | 18 + .../OpExtInst_TestFaceForwardVec4.frag | 18 + .../extensions/OpExtInst_TestFindILsbInt.frag | 18 + .../OpExtInst_TestFindILsbUint.frag | 18 + .../extensions/OpExtInst_TestFindLsbInt.frag | 18 + .../extensions/OpExtInst_TestFindMsbInt.frag | 18 + .../extensions/OpExtInst_TestFindMsbUint.frag | 18 + .../extensions/OpExtInst_TestFindSMsb.frag | 18 + .../extensions/OpExtInst_TestFindUMsb.frag | 18 + .../OpExtInst_TestFloatBitsToInt_lit.frag | 18 + .../OpExtInst_TestFloatBitsToUint_lit.frag | 18 + .../OpExtInst_TestFloorDouble_lit.frag | 18 + .../OpExtInst_TestFloorFloat_lit.frag | 18 + .../OpExtInst_TestFloorVec4Const_lit.frag | 18 + .../OpExtInst_TestFmaDouble_lit.frag | 18 + .../OpExtInst_TestFmaFloat_lit.frag | 18 + .../OpExtInst_TestFmaVec4Const_lit.frag | 18 + .../OpExtInst_TestFractDouble_lit.frag | 18 + .../OpExtInst_TestFractFloat_lit.frag | 18 + .../OpExtInst_TestFractVec4Const-lit.frag | 18 + .../OpExtInst_TestFrexpDouble_lit.frag | 18 + .../OpExtInst_TestFrexpFloat_lit.frag | 18 + .../OpExtInst_TestFrexpStructDouble_lit.frag | 18 + .../OpExtInst_TestFrexpStructFloat_lit.frag | 18 + .../OpExtInst_TestFrexpStructVec4_lit.frag | 18 + .../OpExtInst_TestIntBitsToFloat_lit.frag | 18 + ..._TestInterpolateAtCentroidNoPersp_lit.frag | 18 + ...ExtInst_TestInterpolateAtCentroid_lit.frag | 18 + ...OpExtInst_TestInterpolateAtOffset_lit.frag | 18 + ...OpExtInst_TestInterpolateAtSample_lit.frag | 18 + ...pExtInst_TestInterpolateDynIdx1DArray.frag | 18 + ..._TestInterpolateDynIdx1DArrayInStruct.frag | 18 + ...st_TestInterpolateDynIdx1DStructArray.frag | 18 + ..._TestInterpolateDynIdx2DArrayInStruct.frag | 18 + ...terpolateDynIdx2DArrayInStructInArray.frag | 18 + ...st_TestInterpolateDynIdx2DStructArray.frag | 18 + ...pExtInst_TestInterpolateDynIdx3DArray.frag | 18 + ...OpExtInst_TestInterpolateDynIdxVector.frag | 18 + .../OpExtInst_TestInverseMat4_lit.frag | 18 + .../OpExtInst_TestInverseSqrtDouble_lit.frag | 18 + .../OpExtInst_TestInverseSqrtFloat_lit.frag | 18 + ...pExtInst_TestInverseSqrtVec4Const_lit.frag | 18 + .../OpExtInst_TestLdexpDouble_lit.frag | 18 + .../OpExtInst_TestLdexpFloat_lit.frag | 18 + .../OpExtInst_TestLdexpVec4_lit.frag | 18 + .../OpExtInst_TestLengthBasic_lit.frag | 18 + .../OpExtInst_TestLengthDouble_lit.frag | 18 + .../OpExtInst_TestLengthFloat_lit.frag | 18 + .../OpExtInst_TestLengthVec4_lit.frag | 18 + .../OpExtInst_TestLog2Vec4Const_lit.frag | 18 + .../extensions/OpExtInst_TestLog2_lit.frag | 18 + .../OpExtInst_TestLogVec4Const_lit.frag | 18 + .../extensions/OpExtInst_TestLog_lit.frag | 18 + .../OpExtInst_TestMatrixInverseDmat_lit.frag | 18 + .../OpExtInst_TestMatrixInverseMat_lit.frag | 18 + .../OpExtInst_TestMaxBasic_lit.frag | 18 + .../OpExtInst_TestMaxDouble_lit.frag | 18 + .../OpExtInst_TestMaxFloat_lit.frag | 18 + .../extensions/OpExtInst_TestMaxInt_lit.frag | 18 + .../extensions/OpExtInst_TestMaxUint_lit.frag | 18 + .../OpExtInst_TestMinBasic_lit.frag | 18 + .../OpExtInst_TestMinDouble_lit.frag | 18 + .../OpExtInst_TestMinFloat_lit.frag | 18 + .../extensions/OpExtInst_TestMinInt_lit.frag | 18 + .../extensions/OpExtInst_TestMinUint_lit.frag | 18 + .../OpExtInst_TestMixBasic_lit.frag | 18 + ...pExtInst_TestMixLinearBlendDouble_lit.frag | 18 + ...OpExtInst_TestMixLinearBlendFloat_lit.frag | 18 + .../OpExtInst_TestMixSelectDouble_lit.frag | 18 + .../OpExtInst_TestMixSelectFloat_lit.frag | 18 + .../OpExtInst_TestMixSelectInt_lit.frag | 18 + .../OpExtInst_TestMixSelectUint_lit.frag | 18 + .../OpExtInst_TestModfDouble_lit.frag | 18 + .../OpExtInst_TestModfFloat_lit.frag | 18 + .../OpExtInst_TestModfVec4_lit.frag | 18 + .../OpExtInst_TestNonSemanticInfo.spvasm | 18 + .../OpExtInst_TestNormalizeDouble_lit.frag | 18 + .../OpExtInst_TestNormalizeFloat_lit.frag | 18 + .../OpExtInst_TestNormalizeVec4_lit.frag | 18 + .../OpExtInst_TestPackDouble2x32_lit.frag | 18 + .../OpExtInst_TestPackHalf2x16_lit.frag | 18 + .../OpExtInst_TestPackSnorm2x16_lit.frag | 18 + .../OpExtInst_TestPackSnorm4x8_lit.frag | 18 + .../OpExtInst_TestPackUnorm2x16_lit.frag | 18 + .../OpExtInst_TestPackUnorm4x8_lit.frag | 18 + .../extensions/OpExtInst_TestPow2_lit.frag | 18 + .../OpExtInst_TestPowVec4Const_lit.frag | 18 + .../extensions/OpExtInst_TestPow_lit.frag | 18 + .../OpExtInst_TestRadiansVec4Const_lit.frag | 18 + .../extensions/OpExtInst_TestRadians_lit.frag | 18 + .../OpExtInst_TestReflectDouble_lit.frag | 18 + .../OpExtInst_TestReflectFloat_lit.frag | 18 + .../OpExtInst_TestReflectVec4_lit.frag | 18 + .../OpExtInst_TestRefractDouble_lit.frag | 18 + .../OpExtInst_TestRefractFloat_lit.frag | 18 + .../OpExtInst_TestRefractVec4_lit.frag | 18 + .../OpExtInst_TestRoundDouble_lit.frag | 18 + .../OpExtInst_TestRoundEvenDouble_lit.frag | 18 + .../OpExtInst_TestRoundEvenFloat_lit.frag | 18 + .../OpExtInst_TestRoundEvenVec4_lit.frag | 18 + .../OpExtInst_TestRoundFloat_lit.frag | 18 + .../OpExtInst_TestRoundVec4_lit.frag | 18 + .../OpExtInst_TestSignDouble_lit.frag | 18 + .../OpExtInst_TestSignFloat_lit.frag | 18 + .../extensions/OpExtInst_TestSignInt_lit.frag | 18 + .../OpExtInst_TestSignIvec4_lit.frag | 18 + .../OpExtInst_TestSignVec4_lit.frag | 18 + .../OpExtInst_TestSinVec4Const_lit.frag | 18 + .../extensions/OpExtInst_TestSin_lit.frag | 18 + .../OpExtInst_TestSinhFloat_lit.frag | 18 + .../extensions/OpExtInst_TestSinh_lit.frag | 18 + .../OpExtInst_TestSmoothStepDouble_lit.frag | 18 + .../OpExtInst_TestSmoothStepFloat_lit.frag | 18 + ...OpExtInst_TestSmoothStepVec4Const_lit.frag | 18 + .../OpExtInst_TestSqrtDouble_lit.frag | 18 + .../OpExtInst_TestSqrtFloat_lit.frag | 18 + .../OpExtInst_TestSqrtVec4Const_lit.frag | 18 + .../OpExtInst_TestStepDouble_lit.frag | 18 + .../OpExtInst_TestStepFloat_lit.frag | 18 + .../OpExtInst_TestStepVec4Const_lit.frag | 18 + .../OpExtInst_TestTanVec4Const_lit.frag | 18 + .../extensions/OpExtInst_TestTan_lit.frag | 18 + .../OpExtInst_TestTanhFloat_lit.frag | 18 + .../extensions/OpExtInst_TestTanh_lit.frag | 18 + .../OpExtInst_TestTruncDouble_lit.frag | 18 + .../OpExtInst_TestTruncFloat_lit.frag | 18 + .../OpExtInst_TestTruncVec4_lit.frag | 18 + .../OpExtInst_TestUintBitsToFloat_lit.frag | 18 + .../OpExtInst_TestUnpackDouble2x32_lit.frag | 18 + .../OpExtInst_TestUnpackHalf2x16_lit.frag | 18 + .../OpExtInst_TestUnpackSnorm2x16_lit.frag | 18 + .../OpExtInst_TestUnpackSnorm4x8_lit.frag | 18 + .../OpExtInst_TestUnpackUnorm2x16_lit.frag | 18 + .../OpExtInst_TestUnpackUnorm4x8_lit.frag | 18 + .../OpExtInst_TestinverseMat2-lit.frag | 18 + .../PipelineVsFs_TestAlpha2Coverage.pipe | 18 + ...neVsFs_ViewIndexWithMultiViewDisabled.pipe | 18 + .../GraphicsFuzz_ComputeBlockPressure.spvasm | 18 + ...GraphicsFuzz_FindKillUseAfterPoison.spvasm | 18 + .../fuzzer/GraphicsFuzz_ISelAlignment.spvasm | 18 + ...tAccessChainIndexConvertedFromFloat.spvasm | 18 + ...zz_TestAccessChainUsingInputPointer.spvasm | 18 + .../fuzzer/GraphicsFuzz_TestBVec4.spvasm | 18 + ...phicsFuzz_TestConditionalsAndOpKill.spvasm | 18 + ...phicsFuzz_TestControlFlowInFunction.spvasm | 18 + .../GraphicsFuzz_TestLoopDeepIfLoop.spvasm | 18 + .../GraphicsFuzz_TestLoopNestedIfs.spvasm | 18 + ...phicsFuzz_TestLoopsIfsContinuesCall.spvasm | 18 + ...csFuzz_TestMaxMixConditionalDiscard.spvasm | 18 + .../GraphicsFuzz_TestModFGlColor.spvasm | 18 + .../GraphicsFuzz_TestModFTempColor.spvasm | 18 + .../GraphicsFuzz_TestOpCopyObject.spvasm | 18 + ...uzz_TestOpCopyObjectFromAccessChain.spvasm | 18 + .../GraphicsFuzz_TestOpIAddCarry.spvasm | 18 + ...GraphicsFuzz_TestOpPhisAtLoopHeader.spvasm | 18 + .../fuzzer/GraphicsFuzz_TestOpSNegate.spvasm | 18 + .../GraphicsFuzz_TestSimilarNestedIfs.spvasm | 18 + .../fuzzer/GraphicsFuzz_TestSwitch.spvasm | 18 + .../GraphicsFuzz_TestTwoLoopsMatrix.spvasm | 18 + .../GraphicsFuzz_TestTwoLoopsSetStruct.spvasm | 18 + .../GraphicsFuzz_TestTwoLoopsWithBreak.spvasm | 18 + ...csFuzz_UseNotJointlyDominatedByDefs.spvasm | 18 + .../general/AggressiveInvariantLoads.pipe | 18 + .../CallInstAsUserOfGlobalVariable.spvasm | 18 + .../general/CantOptimizePointSizeWrite.pipe | 18 + .../CbShaderMaskWithDummyExport.spvasm | 18 + .../general/CbShaderMaskWithDummyExport4.pipe | 18 + llpc/test/shaderdb/general/CoherentArray.frag | 18 + .../test/shaderdb/general/CoherentVector.frag | 18 + .../shaderdb/general/CsPipelineDumpTest.pipe | 18 + .../shaderdb/general/CsTimerProfileTest.pipe | 18 + .../general/DisableInvariantLoads.pipe | 18 + .../DiscardToDemoteTransformations.frag | 18 + ...ardToDemoteTransformationsNotRequired.frag | 18 + .../general/MeshOutputsToAllocas.mesh | 25 +- .../general/MissingResourceNodeTest.pipe | 18 + .../shaderdb/general/NggInCullingMode.pipe | 18 + llpc/test/shaderdb/general/NsaThreshold.pipe | 18 + .../general/OptimizePointSizeWrite.pipe | 18 + .../shaderdb/general/OutputPrimitiveTest.geom | 25 +- .../PipelineCs_DebugBreak_intrinsic.pipe | 18 + .../general/PipelineCs_DebugPrintf.pipe | 18 + .../PipelineCs_ForceMemoryBarrierScope.pipe | 18 + .../PipelineCs_LdsSpillLimitDwordsOption.pipe | 18 + .../PipelineCs_MultipleRootInlineBuffer.pipe | 18 + ..._OverrideShaderThreadGroupSize16X16X1.pipe | 18 + ...Cs_OverrideShaderThreadGroupSize8X8X1.pipe | 18 + .../PipelineCs_TestConstImmediateStore.pipe | 18 + .../PipelineCs_TestDynDescNoSpill.pipe | 18 + .../PipelineCs_TestDynDescNoSpill_lit.pipe | 18 + .../general/PipelineCs_TestDynDescSpill.pipe | 18 + .../PipelineCs_TestFetch2DMSFmaskBased.pipe | 18 + ...ipelineCs_TestFetch2DMSFmaskBased_lit.pipe | 18 + .../PipelineCs_TestFetch2DMSFmaskOnly.pipe | 18 + .../PipelineCs_TestInlineConstDirect.pipe | 18 + .../PipelineCs_TestInlineConstDirect_lit.pipe | 18 + .../PipelineCs_TestInlineConstIndirect.pipe | 18 + ...ipelineCs_TestInlineConstIndirect_lit.pipe | 18 + .../PipelineCs_TestMultiEntryPoint.pipe | 18 + .../PipelineCs_TestMultiEntryPoint_lit.pipe | 18 + .../PipelineGsTess_TestInOutPacking.pipe | 18 + .../PipelineGs_TestViewIndexAndLayer.pipe | 18 + .../PipelineMesh_OutputPackingInLds.pipe | 18 + ...Mesh_TestMismatchMeshInOutWithAllocas.pipe | 18 + .../PipelineRays_TestLgcRtTraceRayOp.pipe | 18 + .../PipelineTaskMesh_LdsVariables.pipe | 18 + ...Tes_OutputComponentNotReadByNextStage.pipe | 18 + ...elineTcsTes_TestLocMapArrayElemAccess.pipe | 18 + ...neTcsTes_TestLocMapArrayElemDynAccess.pipe | 18 + ...ineTcsTes_TestLocMapLoadBuiltInOutput.pipe | 18 + ...ineTcsTes_TestLocMapLoadGenericOutput.pipe | 18 + ...ipelineTcsTes_TestLocMapVecCompAccess.pipe | 18 + ...lineTcsTes_TestLocMapVecCompDynAccess.pipe | 18 + ...csTes_TestTessLevelDynIndexForIsoline.pipe | 18 + ...neTcsTes_TestTessLevelDynIndexForQuad.pipe | 18 + ...sTes_TestTessLevelDynIndexForTriangle.pipe | 18 + ...ineTcsTes_TestTessLevelElemForIsoline.pipe | 18 + ...pelineTcsTes_TestTessLevelElemForQuad.pipe | 18 + ...neTcsTes_TestTessLevelElemForTriangle.pipe | 18 + ...ipelineTcsTes_TestTessLevelForIsoline.pipe | 18 + .../PipelineTcsTes_TestTessLevelForQuad.pipe | 18 + ...pelineTcsTes_TestTessLevelForTriangle.pipe | 18 + .../PipelineTess_TestInOutPacking.pipe | 18 + .../PipelineTess_XfbWithManyComponents.pipe | 18 + .../PipelineVsFs_ColorExportShader.pipe | 18 + .../general/PipelineVsFs_DisableFMA.pipe | 18 + .../PipelineVsFs_DynamicSampleInfo.pipe | 18 + .../general/PipelineVsFs_FsWithData.pipe | 18 + .../general/PipelineVsFs_GlPositionFMF.pipe | 18 + .../PipelineVsFs_MultiTableDescSet.pipe | 18 + .../PipelineVsFs_NullFragmentShader.pipe | 18 + .../PipelineVsFs_PixelShaderSamplesZero.pipe | 18 + ...ipelineVsFs_TestBarycentric_line_list.pipe | 18 + .../PipelineVsFs_TestBarycentric_tri_fan.pipe | 18 + ...PipelineVsFs_TestBarycentric_tri_list.pipe | 18 + .../PipelineVsFs_TestColorFormat_A8.pipe | 18 + .../PipelineVsFs_TestConstImmediateStore.pipe | 18 + .../PipelineVsFs_TestDualSourceBlend.pipe | 18 + ...s_TestDualSourceBlend_onlyOneRTExport.pipe | 18 + ...pelineVsFs_TestExpWithRGB_UINT_PACK32.pipe | 18 + ...estIgnoreDynamicDualSourceBlendEnable.pipe | 18 + .../PipelineVsFs_TestInOutPacking.pipe | 18 + .../PipelineVsFs_TestIncludeLlvmIr.pipe | 18 + ...pelineVsFs_TestIndirectResourceLayout.pipe | 18 + ...eVsFs_TestInterpAtCentriodBarycentric.pipe | 18 + .../general/PipelineVsFs_TestNullFs.pipe | 18 + .../PipelineVsFs_TestPervertexVariable.pipe | 18 + .../PipelineVsFs_TestPointerInOut.pipe | 18 + .../PipelineVsFs_TestPrimitiveID_First.pipe | 18 + .../PipelineVsFs_TestPrimitiveID_Last.pipe | 18 + ...pelineVsFs_TestSubpassInputFmaskBased.pipe | 18 + .../general/PipelineVsFs_TestUberShader.pipe | 18 + .../PipelineVsFs_TestVertexDivisor.pipe | 18 + .../PipelineVsFs_TestVertexFetchWithR8G8.pipe | 18 + .../PipelineVsFs_TestViewportIndex.pipe | 18 + .../PipelineVsFs_Test_unused_outputs.pipe | 18 + .../general/PipelineVsFs_VsAndFsWithData.pipe | 18 + .../general/PipelineVsFs_VsWithData.pipe | 18 + .../PipelineVsGsFs_TestDwordPacking.pipe | 18 + .../general/PipelineVsGsFs_TestMergeNode.pipe | 18 + .../general/PipelineVsGs_TestBasicInOut.pipe | 18 + .../PipelineVsGs_TestBuiltinInOut.pipe | 18 + .../PipelineVsPs_TestFetchRGB10A2.pipe | 18 + .../shaderdb/general/PrintOptionsTest.spvasm | 18 + .../ScalarBlockLayoutOptionTest.spvasm | 18 + .../shaderdb/general/ScheduleStrategy.pipe | 18 + .../TessInOutWithReadBackOnlyOutputs.pipe | 18 + ...estCombineOfMultipleStoreInstructions.frag | 18 + ...ompilationOfNestedStructTaskPayload.spvasm | 18 + .../general/TestComponentIndexing.tese | 25 +- .../TestConstantImmStore_FunctionInline.frag | 18 + .../general/TestDeduplicateConstTables.frag | 18 + .../general/TestDeduplicateConstTables.spvasm | 18 + ...tNumComponentsWithReversedAccessOrder.mesh | 25 +- .../general/TestWorkgroupMemoryLayout.spvasm | 18 + .../shaderdb/general/UndefVertexOutput.spvasm | 18 + .../VertexOptimizationLevelTest.spvasm | 18 + .../general/VertexPipelineDumpTest.spvasm | 18 + .../general/VertexTimerProfileTest.spvasm | 18 + .../general/VsFsPipelineDumpTest.pipe | 18 + .../general/VsFsTimerProfileTest.pipe | 18 + .../WorkaroundStorageImageFormats.pipe | 18 + .../general/WorkgroupSizeLiteral.spvasm | 18 + llpc/test/shaderdb/general/outputArray.frag | 18 + .../gfx10/CheckFMFOptions_NoContract.pipe | 18 + .../gfx10/PipelineMergeAttributes_GsVs.pipe | 18 + .../PipelineMergeAttributes_GsVsNgg.pipe | 18 + .../gfx10/PipelineMergeAttributes_HsLs.pipe | 18 + .../PipelineVsFs_TestFetchSingleInputNgg.pipe | 18 + ...ineVsFs_TestSubgroupSizeUsageFragment.pipe | 18 + ...elineVsFs_TestSubgroupSizeUsageVertex.pipe | 18 + .../PipelineVsFs_TestVsOutMiscSideBusEna.pipe | 18 + llpc/test/shaderdb/gfx10/WgpModeDisabled.pipe | 18 + llpc/test/shaderdb/gfx10/WgpModeEnabled.pipe | 18 + .../shaderdb/gfx11/AttributePrecedesPos.pipe | 18 + llpc/test/shaderdb/gfx11/FlatParamDpp.frag | 18 + llpc/test/shaderdb/gfx11/HalfAttribute.frag | 18 + .../shaderdb/gfx11/SgprUserDataInit_Cs.pipe | 18 + .../shaderdb/gfx11/SgprUserDataInit_Fs.pipe | 18 + .../gfx11/TessFactorStoreWithOpt.pipe | 18 + .../gfx11/TestGdsOperationsForXfb.vert | 18 + .../shaderdb/gfx11/TestGsXfbWithHole.pipe | 18 + .../cooperativeMatrix/extract-insert.spvasm | 18 + .../PipelineRays_TestStaticCompile.pipe | 18 + .../shaderdb/gfx12/DynamicVGPRDisabled.pipe | 175 ++++ .../shaderdb/gfx12/DynamicVGPREnabled.pipe | 185 ++++ .../shaderdb/gfx12/DynamicVGPREnabled32.pipe | 180 ++++ .../gfx12/DynamicVgprWithContinufy.pipe | 176 ++++ .../shaderdb/gfx12/Float16Dot2WithRTE.spvasm | 109 +++ .../shaderdb/gfx12/Float16Dot2WithRTZ.spvasm | 109 +++ .../gfx12/Float16Dot2WithSignedZeros.spvasm | 129 +++ llpc/test/shaderdb/gfx12/ImageAtomicFAdd.vert | 45 + .../shaderdb/gfx12/ImageAtomicFMinMax.vert | 48 ++ ...PipelineCs_16BitSBufferLoadConversion.pipe | 92 ++ ...elineRays_SetAutoCompileContinuations.pipe | 175 ++++ .../shaderdb/gfx12/SubgroupQuadBroadcast.frag | 81 ++ .../shaderdb/gfx12/TuningTemporalHints.pipe | 135 +++ .../shaderdb/gfx12/WorkgroupRoundRobin.pipe | 96 +++ llpc/test/shaderdb/gfx12/lit.local.cfg | 36 + .../PipelineLibCes_TestColorExport.pipe | 18 + .../PipelineLibFs_TestFsLibrary.pipe | 18 + .../PipelineLibVs_TestVsLibrary.pipe | 18 + .../PipelineVsFs_TestGraphicsLibrary.pipe | 18 + .../hlsl/Hlsl_TestCBufferArrayPacking.spvasm | 18 + ...l_TestCBufferArrayPackingFullStruct.spvasm | 18 + ...Hlsl_TestLoadRowMajorMatrixInStruct.spvasm | 18 + ...lsl_TestStoreRowMajorMatrixInStruct.spvasm | 18 + .../hlsl/Hlsl_TestStructuredBuffers.spvasm | 18 + .../SpirvTwoEntryPoints.spvasm | 18 + .../multiple_inputs/test_inputs/Fs1.frag | 18 + .../multiple_inputs/test_inputs/Fs2.frag | 18 + .../PipelineVsFs_ConstantData_Vs1Fs1.pipe | 18 + .../PipelineVsFs_ConstantData_Vs1Fs2.pipe | 18 + .../PipelineVsFs_ConstantData_Vs2Fs1.pipe | 18 + .../multiple_inputs/test_inputs/Vs1.vert | 18 + .../multiple_inputs/test_inputs/Vs2.vert | 18 + .../object/ObjConstant_TestArray_lit.frag | 18 + .../object/ObjConstant_TestElementReuse.frag | 18 + .../object/ObjConstant_TestMatrix_lit.frag | 18 + ...onstant_TestSpecConstantArraySize_lit.vert | 18 + .../ObjConstant_TestSpecConstantOp_lit.vert | 18 + ...Constant_TestSpecialSourceSwizzle_lit.frag | 18 + .../object/ObjConstant_TestStruct_lit.frag | 18 + .../object/ObjFragMask_TestFragFetch_lit.frag | 18 + .../ObjImage_TestMemoryQualifier_lit.frag | 18 + .../object/ObjInput_TestDrawParams_lit.vert | 18 + .../object/ObjInput_TestFsBasic_lit.frag | 18 + .../object/ObjInput_TestFsBuiltIn_lit.frag | 18 + .../ObjInput_TestFsCompSpecifier_lit.frag | 18 + .../object/ObjInput_TestFsDouble_lit.frag | 18 + .../object/ObjInput_TestFsInBlock_lit.frag | 18 + ...nput_TestFsInterpQualifierInBlock_lit.frag | 18 + ...put_TestFsInterpQualifierOnStruct_lit.frag | 18 + .../ObjInput_TestFsInterpQualifier_lit.frag | 18 + .../ObjInput_TestFsMatrixArray_lit.frag | 18 + .../object/ObjInput_TestFsMatrix_lit.frag | 18 + ...t_TestFsNonVolatileHelperInvocation.spvasm | 18 + .../object/ObjInput_TestFsStruct_lit.frag | 18 + .../ObjInput_TestFsVectorArray_lit.frag | 18 + ...nput_TestFsVolatileHelperInvocation.spvasm | 18 + .../object/ObjInput_TestGsBasic_lit.geom | 25 +- .../object/ObjInput_TestGsBuiltIn_lit.geom | 25 +- .../ObjInput_TestGsCompSpecifier_lit.geom | 25 +- ...ut_TestIndexingInterpOfInputArray_lit.frag | 18 + .../object/ObjInput_TestTcsBasic_lit.tesc | 25 +- .../object/ObjInput_TestTcsBuiltIn_lit.tesc | 25 +- .../ObjInput_TestTcsCompSpecifier_lit.tesc | 25 +- ...put_TestTcsLoadEntireInputArray_lit.spvasm | 18 + .../object/ObjInput_TestTcsViewIndex.spvasm | 18 + .../object/ObjInput_TestTesBasic_lit.tese | 25 +- .../object/ObjInput_TestTesBuiltIn_lit.tese | 25 +- .../ObjInput_TestTesCompSpecifier_lit.tese | 25 +- .../ObjInput_TestTesComplexInBlock_lit.tese | 25 +- ...jInput_TestTesComplexPatchInBlock_lit.tese | 25 +- .../object/ObjInput_TestTesConstExpr_lit.tese | 25 +- .../object/ObjInput_TestVsBasic_lit.vert | 18 + .../object/ObjInput_TestVsBuiltIn_lit.vert | 18 + .../ObjInput_TestVsCompSpecifier_lit.vert | 18 + .../object/ObjInput_TestVsDouble_lit.vert | 18 + .../object/ObjInput_TestVsMatrix.vert | 18 + .../object/ObjInput_TestVsMatrixArray.vert | 18 + .../ObjInput_TestVsVectorArray_lit.vert | 18 + .../object/ObjNonUniform_TestImageSample.frag | 18 + .../ObjNonUniform_TestMinNonUniform.spvasm | 18 + .../object/ObjOutput_TestFsBasic_lit.frag | 18 + .../object/ObjOutput_TestFsBuiltIn_lit.frag | 18 + .../ObjOutput_TestFsCompSpecifier_lit.frag | 18 + .../object/ObjOutput_TestFsNoOut.frag | 18 + .../object/ObjOutput_TestFsVector_lit.frag | 18 + .../object/ObjOutput_TestGsBasic_lit.geom | 25 +- .../object/ObjOutput_TestGsBuiltIn_lit.geom | 25 +- .../ObjOutput_TestGsCompSpecifier_lit.geom | 25 +- .../object/ObjOutput_TestLlpcOpt.frag | 18 + .../shaderdb/object/ObjOutput_TestOpt.frag | 18 + .../object/ObjOutput_TestTcsBasic_lit.tesc | 25 +- .../object/ObjOutput_TestTcsBuiltIn_lit.tesc | 25 +- .../ObjOutput_TestTcsCompSpecifier_lit.tesc | 25 +- .../ObjOutput_TestTcsComplexOutBlock_lit.tesc | 25 +- ...utput_TestTcsComplexPatchOutBlock_lit.tesc | 25 +- .../ObjOutput_TestTcsConstExpr_lit.tesc | 25 +- .../object/ObjOutput_TestTesBasic_lit.tese | 25 +- .../object/ObjOutput_TestTesBuiltIn_lit.tese | 25 +- .../ObjOutput_TestTesCompSpecifier_lit.tese | 25 +- .../object/ObjOutput_TestVsBasic_lit.vert | 18 + .../object/ObjOutput_TestVsBuiltIn_lit.vert | 18 + .../ObjOutput_TestVsCompSpecifier_lit.vert | 18 + .../object/ObjOutput_TestVsDouble_lit.vert | 18 + .../object/ObjOutput_TestVsMatrix.vert | 18 + .../object/ObjOutput_TestVsMatrixArray.vert | 18 + .../object/ObjOutput_TestVsNoBuiltIn_lit.vert | 18 + .../object/ObjOutput_TestVsNoGeneric_lit.vert | 18 + .../object/ObjOutput_TestVsOutBlock_lit.vert | 18 + .../object/ObjOutput_TestVsStruct_lit.vert | 18 + .../ObjOutput_TestVsVectorArray_lit.vert | 18 + .../object/ObjPushConst_TestBasic_lit.vert | 18 + .../ObjPushConst_TestNestedStruct_lit.vert | 18 + .../ObjPushConst_TestSpillToMemory_lit.vert | 18 + .../object/ObjPushConstant_TestBasic_lit.frag | 18 + ...hConstant_TestMultiPushConstant_lit.spvasm | 18 + .../object/ObjResource_TestAlias_lit.frag | 18 + .../object/ObjSampler_TestLargeId_lit.frag | 18 + ...Sampler_TestSeparateSamplerShadow_lit.frag | 18 + .../object/ObjStorageBlock_TestAlign_lit.frag | 18 + .../ObjStorageBlock_TestDirectIndex_lit.frag | 18 + .../ObjStorageBlock_TestDouble_lit.frag | 18 + ...ObjStorageBlock_TestIndirectIndex_lit.frag | 18 + ...bjStorageBlock_TestMatrixInStruct_lit.vert | 18 + ...jStorageBlock_TestMemoryQualifier_lit.frag | 18 + ...geBlock_TestMultiLevelAccessChain_lit.vert | 18 + .../ObjStorageBlock_TestOffset_lit.frag | 18 + .../ObjStorageBlock_TestRowMajor_lit.frag | 18 + .../ObjStorageBlock_TestRuntimeArray_lit.vert | 18 + ...StorageBlock_TestStoreBasicDouble_lit.vert | 18 + ...jStorageBlock_TestStoreBasicFloat_lit.vert | 18 + ...ObjStorageBlock_TestStoreBasicInt_lit.vert | 18 + ...bjStorageBlock_TestStoreBasicUint_lit.vert | 18 + ...StorageBlock_TestStoreMatrixArray_lit.vert | 18 + .../ObjStorageBlock_TestStoreMatrix_lit.vert | 18 + ...geBlock_TestStoreMixedMatrixStyle_lit.frag | 18 + ...rageBlock_TestStoreRowMajorMatrix_lit.frag | 18 + ...StorageBlock_TestStoreScalarArray_lit.vert | 18 + .../ObjStorageBlock_TestStoreStruct_lit.vert | 18 + ...orageBlock_TestStoreToMatrixArray_lit.vert | 18 + ...ObjStorageBlock_TestStoreToMatrix_lit.vert | 18 + ...geBlock_TestStoreToRowMajorMatrix_lit.frag | 18 + ...lock_TestStoreToScalarVectorArray_lit.vert | 18 + ...bjStorageBlock_TestStoreVectorArray.spvasm | 18 + ...StorageBlock_TestStoreVectorArray_lit.vert | 18 + ...StorageBlock_TestUseStorageBuffer_lit.frag | 18 + .../object/ObjUniformBlock_TestAlign_lit.frag | 18 + .../ObjUniformBlock_TestDirectIndex_lit.frag | 18 + ...ObjUniformBlock_TestIndirectIndex_lit.frag | 18 + ...jUniformBlock_TestLoadBasicDouble_lit.vert | 18 + ...bjUniformBlock_TestLoadBasicFloat_lit.vert | 18 + .../ObjUniformBlock_TestLoadBasicInt_lit.vert | 18 + ...ObjUniformBlock_TestLoadBasicUint_lit.vert | 18 + ...formBlock_TestLoadFromMatrixArray_lit.vert | 18 + ...bjUniformBlock_TestLoadFromMatrix_lit.vert | 18 + ...mBlock_TestLoadFromRowMajorMatrix_lit.frag | 18 + ...ock_TestLoadFromScalarVectorArray_lit.vert | 18 + ...jUniformBlock_TestLoadMatrixArray_lit.vert | 18 + .../ObjUniformBlock_TestLoadMatrix_lit.vert | 18 + ...ormBlock_TestLoadMixedMatrixStyle_lit.frag | 18 + ...UniformBlock_TestLoadNestedStruct_lit.vert | 18 + ...iformBlock_TestLoadRowMajorMatrix_lit.frag | 18 + ...jUniformBlock_TestLoadScalarArray_lit.vert | 18 + .../ObjUniformBlock_TestLoadStruct_lit.vert | 18 + ...jUniformBlock_TestLoadVectorArray_lit.vert | 18 + .../ObjUniformBlock_TestOffset_lit.frag | 18 + .../ObjUniformConstant_TestArray_lit.frag | 18 + .../ObjUniformConstant_TestSimple_lit.frag | 18 + .../shaderdb/object/ObjXfb_TestBasic_lit.vert | 18 + .../ray_tracing/PipelineRayquery.pipe | 18 + .../PipelineRays_Continuations.pipe | 18 + ...inuations_IntersectionShaderVgprCount.pipe | 18 + ...Continuations_SpecializeDriverShaders.pipe | 18 + ...inuations_SpecializeDriverShaders_Isa.pipe | 18 + .../PipelineRays_DifferentPayloads.pipe | 18 + .../ray_tracing/PipelineRays_Irreducible.pipe | 18 + .../ray_tracing/PipelineRays_NoPayload.pipe | 18 + ...neRays_SetContinuationsCompileUnified.pipe | 18 + .../PipelineRays_TestLaunchKernel.pipe | 18 + .../PipelineRays_TestLibraryNoTraversal.pipe | 18 + ...eRays_TestRtIgnoreDeclaredPayloadSize.pipe | 18 + .../PipelineRays_TestStaticCompile.pipe | 18 + .../PipelineRays_TestWaveSize.pipe | 18 + .../shaderdb/ray_tracing/TestContState.rchit | 18 + .../shaderdb/ray_tracing/TestContState.rgen | 18 + .../ray_tracing/TestHitAttribute.rint | 18 + .../ray_tracing/TestKnownRayFlags.pipe | 18 + .../ray_tracing/TestPayloadSizes.rgen | 18 + .../ray_tracing/TestProcessGpuRtLibrary.rgen | 18 + .../shaderdb/ray_tracing/standalone.rahit | 18 + .../shaderdb/ray_tracing/standalone.rcall | 18 + .../shaderdb/ray_tracing/standalone.rchit | 18 + .../test/shaderdb/ray_tracing/standalone.rgen | 18 + .../test/shaderdb/ray_tracing/standalone.rint | 18 + .../DescPtrSingleSelect.spvasm | 18 + .../PipelineCs_PipelineCacheHit.pipe | 18 + .../PipelineCs_TestUnsupportedShader.pipe | 18 + .../PipelineGs_BasicRelocGsTest.pipe | 18 + ...elineTess_RelocRemoveUnusedTcsOutputs.pipe | 18 + ...ipelineVsFs_CheckFloatModeFlushToZero.pipe | 18 + .../PipelineVsFs_CheckFloatModePreserve.pipe | 18 + .../PipelineVsFs_EnableColorExport.pipe | 18 + .../PipelineVsFs_FillPsInput.pipe | 18 + .../PipelineVsFs_ImmutableSampler.pipe | 18 + .../PipelineVsFs_MultiDwordPushConst.pipe | 18 + .../PipelineVsFs_MultipleConstData.pipe | 18 + .../PipelineVsFs_PipelineCacheHit.pipe | 18 + .../PipelineVsFs_RelocCheckPsInControl.pipe | 18 + .../PipelineVsFs_RelocMultiView.pipe | 18 + .../PipelineVsFs_ShadowDescTable.pipe | 18 + ...elineVsFs_ShadowDescTableMissingFmask.pipe | 18 + ...elineVsFs_TestRelocatableInOutMapping.pipe | 18 + ...Fs_TestRelocatableSeparateCompilation.pipe | 18 + .../PipelineVsPs_BuiltinExportInPrologue.pipe | 18 + .../PipelineVsPs_PsInput.pipe | 18 + ...ngleVs_CheckNoteSectionForCacheHash.spvasm | 18 + .../relocatable_shaders/VsGs_Reloc.spvasm | 18 + .../UpdateTestChecks/Inputs/base_test.pipe | 25 +- .../Inputs/check_pal_metadata.pipe | 25 +- .../Inputs/stable_ir_values.pipe | 25 +- llpc/translator/lib/SPIRV/SPIRVInternal.h | 5 +- llpc/translator/lib/SPIRV/SPIRVReader.cpp | 62 ++ .../translator/lib/SPIRV/libSPIRV/SPIRVEnum.h | 4 +- .../lib/SPIRV/libSPIRV/SPIRVInstruction.h | 3 + .../lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 4 +- .../lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h | 2 + .../lib/SPIRV/libSPIRV/SPIRVType.cpp | 8 +- .../translator/lib/SPIRV/libSPIRV/SPIRVType.h | 7 +- llpc/unittests/util/testPipelineDumper.cpp | 22 + llvmraytracing/include/lgc/LgcCpsDialect.h | 4 + llvmraytracing/lib/DXILContPostProcess.cpp | 16 + llvmraytracing/lib/LgcCpsDialect.cpp | 31 + llvmraytracing/plugin/CMakeLists.txt | 2 +- sharedme/xdl/CMakeLists.txt | 7 + sharedme/xdl/include/lgc/LgcXdlDialect.td | 3 + sharedme/xdl/include/lgc/LgcXdlTypes.h | 11 + sharedme/xdl/include/xdl/util/ElementType.h | 5 + sharedme/xdl/util/ElementType.cpp | 33 + .../subgroupshuffle-index-constant.amber | 25 +- .../amber/subgroupshuffle-index-uniform.amber | 25 +- tool/dumper/vkgcPipelineDumper.cpp | 44 + tool/vfx/vfxVkSection.h | 29 + util/gpurtshim/CMakeLists.txt | 5 + util/gpurtshim/GpurtShim.cpp | 25 +- version/CMakeLists.txt | 10 + version/include/llpc/GpurtEnums.h | 6 + version/include/llpcVersion.h.in | 15 + 1287 files changed, 29999 insertions(+), 89 deletions(-) create mode 100644 lgc/lowering/AddBufferOperationMetadata.cpp create mode 100644 lgc/test/Transforms/CpsLoweringWithDvgpr/continuation-basic.lgc create mode 100644 lgc/test/Transforms/CpsLoweringWithDvgpr/cps-entry-point.lgc create mode 100644 lgc/test/Transforms/CpsLoweringWithDvgpr/cps-stack-lowering.lgc create mode 100644 lgc/test/Transforms/CpsLoweringWithDvgpr/cps-unify-exits.lgc create mode 100644 lgc/test/Transforms/CpsLoweringWithDvgpr/lit.local.cfg create mode 100644 lgc/test/shaderdb/gfx12/CsBPermuteWave64.lgc create mode 100644 lgc/test/shaderdb/gfx12/CsClusteredMultiExclusive.lgc create mode 100644 lgc/test/shaderdb/gfx12/buffer.atomic.ops.scope.lgc create mode 100644 lgc/test/shaderdb/gfx12/gfx1200wavematrix-load-wave64.lgc create mode 100644 lgc/test/shaderdb/gfx12/gfx1200wavematrix-store-wave64.lgc create mode 100644 lgc/test/shaderdb/gfx12/gfx1200wavematrix.lgc create mode 100644 lgc/test/shaderdb/gfx12/lit.local.cfg create mode 100644 lgc/test/shaderdb/gfx12/packed-accumulators-gfx12.lgc create mode 100644 lgc/test/shaderdb/gfx12/s_buffer_load-conversion-gfx12.lgc create mode 100644 llpc/test/shaderdb/gfx12/DynamicVGPRDisabled.pipe create mode 100644 llpc/test/shaderdb/gfx12/DynamicVGPREnabled.pipe create mode 100644 llpc/test/shaderdb/gfx12/DynamicVGPREnabled32.pipe create mode 100644 llpc/test/shaderdb/gfx12/DynamicVgprWithContinufy.pipe create mode 100644 llpc/test/shaderdb/gfx12/Float16Dot2WithRTE.spvasm create mode 100644 llpc/test/shaderdb/gfx12/Float16Dot2WithRTZ.spvasm create mode 100644 llpc/test/shaderdb/gfx12/Float16Dot2WithSignedZeros.spvasm create mode 100644 llpc/test/shaderdb/gfx12/ImageAtomicFAdd.vert create mode 100644 llpc/test/shaderdb/gfx12/ImageAtomicFMinMax.vert create mode 100644 llpc/test/shaderdb/gfx12/PipelineCs_16BitSBufferLoadConversion.pipe create mode 100644 llpc/test/shaderdb/gfx12/PipelineRays_SetAutoCompileContinuations.pipe create mode 100644 llpc/test/shaderdb/gfx12/SubgroupQuadBroadcast.frag create mode 100644 llpc/test/shaderdb/gfx12/TuningTemporalHints.pipe create mode 100644 llpc/test/shaderdb/gfx12/WorkgroupRoundRobin.pipe create mode 100644 llpc/test/shaderdb/gfx12/lit.local.cfg diff --git a/.typos.toml b/.typos.toml index d94ecdccc3..0f9c93ead6 100644 --- a/.typos.toml +++ b/.typos.toml @@ -29,3 +29,5 @@ dne = "dne" offen = "offen" varing = "varing" Derivate = "Derivate" +TESE = "TESE" +SER = "SER" diff --git a/cmake/vkgc.cmake b/cmake/vkgc.cmake index acde1624ca..07c09dd3cb 100644 --- a/cmake/vkgc.cmake +++ b/cmake/vkgc.cmake @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -38,6 +38,12 @@ if(LLPC_BUILD_GFX11) endif() #endif +#if LLPC_BUILD_GFX12 +if(LLPC_BUILD_GFX12) + target_compile_definitions(vkgc_headers INTERFACE LLPC_BUILD_GFX12) +endif() +#endif + #if LLPC_RAY_TRACING if(LLPC_RAY_TRACING) if(NOT LLPC_IS_STANDALONE) diff --git a/compilerutils/CMakeLists.txt b/compilerutils/CMakeLists.txt index f1830d0154..ead8d8b2e8 100644 --- a/compilerutils/CMakeLists.txt +++ b/compilerutils/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to diff --git a/compilerutils/plugin/CMakeLists.txt b/compilerutils/plugin/CMakeLists.txt index 2ec5a1291c..e47b6a217d 100644 --- a/compilerutils/plugin/CMakeLists.txt +++ b/compilerutils/plugin/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to diff --git a/compilerutils/test/CMakeLists.txt b/compilerutils/test/CMakeLists.txt index 3033c8910b..42f25782b9 100644 --- a/compilerutils/test/CMakeLists.txt +++ b/compilerutils/test/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to diff --git a/gfxruntime/src/shaders/AdvancedBlend.hlsl b/gfxruntime/src/shaders/AdvancedBlend.hlsl index e142a629f5..11ce83e1a9 100644 --- a/gfxruntime/src/shaders/AdvancedBlend.hlsl +++ b/gfxruntime/src/shaders/AdvancedBlend.hlsl @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to diff --git a/include/vkgcDefs.h b/include/vkgcDefs.h index 324baec4c4..1b0bb962e2 100644 --- a/include/vkgcDefs.h +++ b/include/vkgcDefs.h @@ -453,6 +453,20 @@ struct CompileTimeConst { } values; ///< The compile-time values for this slot. }; +#if LLPC_BUILD_GFX12 +/// Handle temporal hint +enum TemporalHintOpType { + TemporalHintAtmWrite = 0, + TemporalHintImageRead = 4, + TemporalHintImageWrite = 8, + TemporalHintTessFactorWrite = 12, + TemporalHintTessRead = 16, + TemporalHintTessWrite = 20, + TemporalHintBufferRead = 24, + TemporalHintBufferWrite = 28, +}; +#endif + /// Represents info of compile-time constants within a shader of a specified stage. struct CompileConstInfo { unsigned numCompileTimeConstants; ///< Number of compile time constants. @@ -496,7 +510,11 @@ struct PipelineOptions { bool reverseThreadGroup; ///< If set, enable thread group reversing bool internalRtShaders; ///< Whether this pipeline has internal raytracing shaders unsigned forceNonUniformResourceIndexStageMask; ///< Mask of the stage to force using non-uniform resource index. +#if LLPC_BUILD_GFX12 + bool expertSchedulingMode; +#else bool reserved16; +#endif struct GLState { bool replaceSetWithResourceType; ///< For OGL only, replace 'set' with resource type during spirv translate @@ -519,14 +537,27 @@ struct PipelineOptions { } glState; const auto &getGlState() const { return glState; } +#if LLPC_BUILD_GFX12 + unsigned cacheScopePolicyControl; ///< Control cache scope policy. attributes-through-memory read/write is + /// available. +#else unsigned reserved20; +#endif bool enablePrimGeneratedQuery; ///< If set, primitive generated query is enabled bool disablePerCompFetch; ///< Disable per component fetch in uber fetch shader. bool reserved21; bool optimizePointSizeWrite; ///< If set, the write of PointSize in the last vertex processing stage will be ///< eliminated if the write value is 1.0. CompileConstInfo *compileConstInfo; ///< Compile time constant data. +#if LLPC_BUILD_GFX12 + unsigned temporalHintControl; ///< Override value for temporal hint. A load/store occupies 4 bits. The highest bit + /// of 4 bits marks whether to override temporal hint. + /// Arrange from the low bit to high bit in the following order: + /// TemporalHintAtmWrite,TemporalHintImageRead, TemporalHintImageWrite, + /// TemporalHintTessFactorWrite, TemporalHintTessRead, TemporalHintTessWrite +#else unsigned reserved22; +#endif bool padBufferSizeToNextDword; ///< Vulkan only, set if the driver rounds the buffer size up the next dword }; @@ -776,6 +807,28 @@ inline unsigned compact32(ShaderHash hash) { /// Represent a pipeline option which can be automatic as well as explicitly set. enum InvariantLoads : unsigned { Auto = 0, EnableOptimization = 1, DisableOptimization = 2, ClearInvariants = 3 }; +#if LLPC_BUILD_GFX12 +/// Control cache policy: whether to use LLC (last level cache, aka set noAlloc). +struct CachePolicyLlc { + union NoAllocResource { + struct { + unsigned set : 5; ///< Resource set + unsigned binding : 16; ///< Resource binding + unsigned noAlloc : 1; ///< llc_noAlloc policy + unsigned : 10; + }; + struct { + unsigned resourceId : 21; ///< Resource set + unsigned : 11; + }; + unsigned u32All; + }; + + const unsigned *noAllocs; // Set for each resource. + unsigned resourceCount; // The count of resources +}; +#endif + /// Represents per shader stage options. struct PipelineShaderOptions { ShaderHash clientHash; ///< Client-supplied unique shader hash. A value of zero indicates that LLPC should @@ -918,6 +971,10 @@ struct PipelineShaderOptions { /// Application workaround: forward propagate NoContraction decoration to any related FAdd operation. bool forwardPropagateNoContract; +#if LLPC_BUILD_GFX12 + /// Enable round-robin mode for waves in workgroup. + bool workgroupRoundRobin; +#endif /// Binding ID offset of default uniform block unsigned constantBufferBindingOffset; @@ -931,6 +988,15 @@ struct PipelineShaderOptions { /// will be assigned values as if they were decorated as DeviceIndex. bool viewIndexFromDeviceIndex; +#if LLPC_BUILD_GFX12 + /// Control LLC cache policy + CachePolicyLlc cachePolicyLlc; + + /// Override value for temporal hint. A load/store occupies 4 bits. The highest bit of 4 bits marks whether to + /// override temporal hint. + unsigned temporalHintShaderControl; +#endif + /// Indicate whether the vertex shader is used by transform pipeline bool enableTransformShader; @@ -1471,13 +1537,18 @@ struct RayTracingPipelineBuildInfo { unsigned pipelineLibStageMask; ///< Pipeline library stage mask //@} - unsigned payloadSizeMaxInLib; ///< Pipeline library maxPayloadSize - unsigned attributeSizeMaxInLib; ///< Pipeline library maxAttributeSize - bool isReplay; ///< Pipeline is created for replaying - const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be - /// stored inside the ELF - size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data - unsigned cpsFlags; ///< Cps feature flags + unsigned payloadSizeMaxInLib; ///< Pipeline library maxPayloadSize + unsigned attributeSizeMaxInLib; ///< Pipeline library maxAttributeSize + bool isReplay; ///< Pipeline is created for replaying + const void *pClientMetadata; ///< Pointer to (optional) client-defined data to be + /// stored inside the ELF + size_t clientMetadataSize; ///< Size (in bytes) of the client-defined data + unsigned cpsFlags; ///< Cps feature flags +#if LLPC_BUILD_GFX12 + bool disableDynamicVgpr; ///< Whether to disable dynamic VGPR mode for continuations. If not set, dVGPR mode is + /// enabled by default. + unsigned dynamicVgprBlockSize; ///< The size of the VGPR allocation granule used in dVGPR mode. +#endif GpurtOption *pGpurtOptions; ///< Array of GPURT options unsigned gpurtOptionCount; ///< Number of GPURT options bool rtIgnoreDeclaredPayloadSize; ///< Ignore the declared payload size in the shader to address issues with Proton. diff --git a/lgc/CMakeLists.txt b/lgc/CMakeLists.txt index e8540aff0b..fbda5a5b34 100644 --- a/lgc/CMakeLists.txt +++ b/lgc/CMakeLists.txt @@ -197,6 +197,12 @@ target_sources(LLVMlgc PRIVATE lowering/LowerRayQueryWrapper.cpp ) +#if LLPC_BUILD_GFX12 +if(LLPC_BUILD_GFX12) + target_sources(LLVMlgc PRIVATE lowering/AddBufferOperationMetadata.cpp) +endif() +#endif + # include/lgc/lowering target_sources(LLVMlgc PRIVATE include/lgc/lowering/AddLoopMetadata.h @@ -235,6 +241,12 @@ target_sources(LLVMlgc PRIVATE include/lgc/lowering/WorkaroundDsSubdwordWrite.h ) +#if LLPC_BUILD_GFX12 +if(LLPC_BUILD_GFX12) + target_sources(LLVMlgc PRIVATE include/lgc/lowering/AddBufferOperationMetadata.h) +endif() +#endif + # lgc/state target_sources(LLVMlgc PRIVATE state/Compiler.cpp diff --git a/lgc/builder/BuilderImpl.cpp b/lgc/builder/BuilderImpl.cpp index e96d2d7cd0..c4def755ad 100644 --- a/lgc/builder/BuilderImpl.cpp +++ b/lgc/builder/BuilderImpl.cpp @@ -74,6 +74,51 @@ Type *BuilderBase::getConditionallyVectorizedTy(Type *elementTy, Type *maybeVecT // @param vector2 : The float vector 2 // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateDotProduct(Value *const vector1, Value *const vector2, const Twine &instName) { +#if LLPC_BUILD_GFX12 + if (getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 12) { + // Use a chain of v_dot2_f16_f16/v_dot2_bf16_bf16 on gfx12+. + // + // Note: GFX11 has this instruction, but its precision doesn't satisfy Vulkan requirements. + // + // Note: GFX10 chips may have v_dot2_f32_f16, which we could consider generating in cases where bitexact results + // are not required. + // + // Note: v_dot2_f16_f16/v_dot2_bf16_bf16 only respects RTE mode according to HW spec. We must check the + // specified rounding mode before using it. Also, v_dot2_f16_f16/v_dot2_bf16_bf16 is not IEEE compliant + // so we must check NSZ as well. + const auto fp16RoundMode = + getPipelineState()->getShaderModes()->getCommonShaderMode(m_shaderStage.value()).fp16RoundMode; + const auto vectorTy = dyn_cast(vector1->getType()); + if (vectorTy && (vectorTy->getScalarSizeInBits() == 16) && + (fp16RoundMode == FpRoundMode::DontCare || fp16RoundMode == FpRoundMode::Even) && + getFastMathFlags().noSignedZeros()) { + int compCount = vectorTy->getNumElements(); + Value *result = nullptr; + Type *basicType = getHalfTy(); + Intrinsic::AMDGCNIntrinsics inst = Intrinsic::amdgcn_fdot2_f16_f16; + if (vectorTy->getScalarType()->isBFloatTy()) { + basicType = getBFloatTy(); + inst = Intrinsic::amdgcn_fdot2_bf16_bf16; + } + + if (compCount % 2 == 0) { + result = ConstantFP::get(basicType, 0.0); + } else { + // If the component count is odd, prefer feeding the last product (odd one out) as initial value. + Value *lhs = CreateExtractElement(vector1, compCount - 1); + Value *rhs = CreateExtractElement(vector2, compCount - 1); + result = CreateFMul(lhs, rhs); + } + + for (int i = 0; i + 1 < compCount; i += 2) { + Value *lhs = CreateShuffleVector(vector1, {i, i + 1}); + Value *rhs = CreateShuffleVector(vector2, {i, i + 1}); + result = CreateIntrinsic(basicType, inst, {lhs, rhs, result}); + } + return result; + } + } +#endif Value *product = CreateFMul(vector1, vector2); if (!isa(product->getType())) @@ -254,6 +299,9 @@ Value *BuilderImpl::CreateIntegerDotProduct(Value *vector1, Value *vector2, Valu bool BuilderImpl::supportWaveWideBPermute(ShaderStageEnum shaderStage) const { auto gfxIp = getPipelineState()->getTargetInfo().getGfxIpVersion().major; auto supportBPermute = gfxIp == 8 || gfxIp == 9; +#if LLPC_BUILD_GFX12 + supportBPermute = supportBPermute || (gfxIp == 12); +#endif auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage); supportBPermute = supportBPermute || waveSize == 32; return supportBPermute; @@ -265,6 +313,14 @@ bool BuilderImpl::supportPermLane64Dpp() const { return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 11; } +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Get whether the context we are building in supports permute lane var operations. +bool BuilderImpl::supportPermLaneVar() const { + return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 12; +} +#endif + // ===================================================================================================================== // Create an "if..endif" or "if..else..endif" structure. The current basic block becomes the "endif" block, and all // instructions in that block before the insert point are moved to the "if" block. The insert point is moved to diff --git a/lgc/builder/DescBuilder.cpp b/lgc/builder/DescBuilder.cpp index fbd99d5183..dc7af2016a 100644 --- a/lgc/builder/DescBuilder.cpp +++ b/lgc/builder/DescBuilder.cpp @@ -422,7 +422,16 @@ Value *BuilderImpl::buildBufferCompactDesc(Value *desc, Value *stride) { sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT; sqBufRsrcWord3.gfx11.oobSelect = stride ? 3 : 2; assert(sqBufRsrcWord3.u32All == 0x20014FAC || sqBufRsrcWord3.u32All == 0x30014FAC); - } else { + } +#if LLPC_BUILD_GFX12 + else if (gfxIp.major == 12) { + sqBufRsrcWord3.gfx12.format = BUF_FORMAT_32_UINT; + sqBufRsrcWord3.gfx12.compressionEn = 1; + sqBufRsrcWord3.gfx12.oobSelect = stride ? 3 : 2; + assert(sqBufRsrcWord3.u32All == 0x22014FAC || sqBufRsrcWord3.u32All == 0x32014FAC); + } +#endif + else { llvm_unreachable("Not implemented!"); } bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord3.u32All), 3); diff --git a/lgc/builder/ImageBuilder.cpp b/lgc/builder/ImageBuilder.cpp index 3940edd21f..280c8dab11 100644 --- a/lgc/builder/ImageBuilder.cpp +++ b/lgc/builder/ImageBuilder.cpp @@ -537,8 +537,30 @@ static const Intrinsic::ID ImageAtomicIntrinsicTable[][8] = { Intrinsic::amdgcn_image_atomic_fmax_3d, Intrinsic::amdgcn_image_atomic_fmax_cube, Intrinsic::amdgcn_image_atomic_fmax_1darray, Intrinsic::amdgcn_image_atomic_fmax_2darray, Intrinsic::amdgcn_image_atomic_fmax_2dmsaa, Intrinsic::amdgcn_image_atomic_fmax_2darraymsaa}, +#if LLPC_BUILD_GFX12 + {Intrinsic::amdgcn_image_atomic_add_flt_1d, Intrinsic::amdgcn_image_atomic_add_flt_2d, + Intrinsic::amdgcn_image_atomic_add_flt_3d, Intrinsic::amdgcn_image_atomic_add_flt_cube, + Intrinsic::amdgcn_image_atomic_add_flt_1darray, Intrinsic::amdgcn_image_atomic_add_flt_2darray, + Intrinsic::amdgcn_image_atomic_add_flt_2dmsaa, Intrinsic::amdgcn_image_atomic_add_flt_2darraymsaa}, +#endif }; +#if LLPC_BUILD_GFX12 +// Intrinsic ID table for GFX12 image fmin atomic +static const Intrinsic::ID ImageAtomicFMinIntrinsicTableGfx12[8] = { + Intrinsic::amdgcn_image_atomic_min_flt_1d, Intrinsic::amdgcn_image_atomic_min_flt_2d, + Intrinsic::amdgcn_image_atomic_min_flt_3d, Intrinsic::amdgcn_image_atomic_min_flt_cube, + Intrinsic::amdgcn_image_atomic_min_flt_1darray, Intrinsic::amdgcn_image_atomic_min_flt_2darray, + Intrinsic::amdgcn_image_atomic_min_flt_2dmsaa, Intrinsic::amdgcn_image_atomic_min_flt_2darraymsaa}; + +// Intrinsic ID table for GFX12 image fmax atomic +static const Intrinsic::ID ImageAtomicFMaxIntrinsicTableGfx12[8] = { + Intrinsic::amdgcn_image_atomic_max_flt_1d, Intrinsic::amdgcn_image_atomic_max_flt_2d, + Intrinsic::amdgcn_image_atomic_max_flt_3d, Intrinsic::amdgcn_image_atomic_max_flt_cube, + Intrinsic::amdgcn_image_atomic_max_flt_1darray, Intrinsic::amdgcn_image_atomic_max_flt_2darray, + Intrinsic::amdgcn_image_atomic_max_flt_2dmsaa, Intrinsic::amdgcn_image_atomic_max_flt_2darraymsaa}; +#endif + // ===================================================================================================================== // Convert an integer or vector of integer type to the equivalent (vector of) half/float/double // @@ -1355,6 +1377,10 @@ Value *BuilderImpl::CreateImageAtomicCommon(unsigned atomicOp, unsigned dim, uns unsigned imageDescArgIndex = 0; if (!isTexelBuffer) { // Resource descriptor. Use the image atomic instruction. +#if LLPC_BUILD_GFX12 + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major < 12 && atomicOp == ImageAtomicFAdd) + report_fatal_error("ImageAtomicFAdd only supported on GFX12+"); +#endif args.push_back(inputValue); if (atomicOp == AtomicOpCompareSwap) @@ -1368,6 +1394,15 @@ Value *BuilderImpl::CreateImageAtomicCommon(unsigned atomicOp, unsigned dim, uns // Get the intrinsic ID from the load intrinsic ID table, and create the intrinsic. // Rectangle image uses the same Intrinsic ID with 2D image. Intrinsic::ID intrinsicId = ImageAtomicIntrinsicTable[atomicOp][dim == DimRect ? Dim2D : dim]; +#if LLPC_BUILD_GFX12 + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 12) { + // The intrinsics for ImageAtomicFMin/ImageAtomicFMax are renamed from GFX12+. + if (atomicOp == ImageAtomicFMin) + intrinsicId = ImageAtomicFMinIntrinsicTableGfx12[dim == DimRect ? Dim2D : dim]; + else if (atomicOp == ImageAtomicFMax) + intrinsicId = ImageAtomicFMaxIntrinsicTableGfx12[dim == DimRect ? Dim2D : dim]; + } +#endif #if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION >= 511095 atomicInst = CreateIntrinsic(inputValue->getType(), intrinsicId, args, nullptr, instName); #else @@ -1472,6 +1507,12 @@ Value *BuilderImpl::CreateImageQuerySamples(unsigned dim, unsigned flags, Value // Extract LAST_LEVEL (SQ_IMG_RSRC_WORD3, [19:16]) lastLevel = CreateIntrinsic(Intrinsic::amdgcn_ubfe, getInt32Ty(), {descWord3, getInt32(16), getInt32(4)}); } +#if LLPC_BUILD_GFX12 + else { + // Extract LAST_LEVEL (SQ_IMG_RSRC_WORD3, [19:15]) + lastLevel = CreateIntrinsic(Intrinsic::amdgcn_ubfe, getInt32Ty(), {descWord3, getInt32(15), getInt32(5)}); + } +#endif // Sample number = 1 << LAST_LEVEL Value *sampleNumber = CreateShl(getInt32(1), lastLevel); @@ -2352,6 +2393,25 @@ CoherentFlag BuilderImpl::getImageCoherentFlag(unsigned flags, bool isRead) { if (flags & ImageFlagLlcNoAlloc) coherent.bits.dlc = true; } +#if LLPC_BUILD_GFX12 + else { + if (flags & ImageFlagCoherent) + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_DEV; + // We do not need to set SCOPE_SYS for volatile because images won't be modified by CPU. + + coherent.gfx12.th = m_pipelineState->getTemporalHint( + TH::TH_RT, isRead ? TemporalHintImageRead : TemporalHintImageWrite, m_shaderStage.value()); + + const bool nearNt = flags & ImageFlagCoherent; + const bool farNt = flags & ImageFlagLlcNoAlloc; + if (nearNt && farNt) + coherent.gfx12.th = TH::TH_NT; + else if (nearNt && !farNt) + coherent.gfx12.th = TH::TH_NT_RT; + else if (!nearNt && farNt) + coherent.gfx12.th = TH::TH_RT_NT; + } +#endif return coherent; } diff --git a/lgc/builder/InOutBuilder.cpp b/lgc/builder/InOutBuilder.cpp index e2095500f4..84924c9eca 100644 --- a/lgc/builder/InOutBuilder.cpp +++ b/lgc/builder/InOutBuilder.cpp @@ -1396,6 +1396,13 @@ Value *BuilderImpl::readCsBuiltIn(BuiltInKind builtIn, const Twine &instName) { case BuiltInSubgroupId: { GfxIpVersion gfxIp = getPipelineState()->getTargetInfo().getGfxIpVersion(); // From Navi21, it should load the subgroupid from sgpr initialized at wave launch. +#if LLPC_BUILD_GFX12 + if (gfxIp.major >= 12) { + Value *waveIdInSubgroup = + ShaderInputs::getInput(ShaderInput::CsWaveId, BuilderBase::get(*this), *getLgcContext()); + return waveIdInSubgroup; + } else +#endif { if (gfxIp >= GfxIpVersion({10, 3})) { Value *multiDispatchInfo = diff --git a/lgc/builder/MiscBuilder.cpp b/lgc/builder/MiscBuilder.cpp index 9b3598110c..cf25ac5e62 100644 --- a/lgc/builder/MiscBuilder.cpp +++ b/lgc/builder/MiscBuilder.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -75,6 +75,13 @@ Instruction *BuilderImpl::CreateEndPrimitive(unsigned streamId) { // ===================================================================================================================== // Create a workgroup control barrier. Instruction *BuilderImpl::CreateBarrier() { +#if LLPC_BUILD_GFX12 + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 12) { + CreateIntrinsic(Intrinsic::amdgcn_s_barrier_signal, {}, getInt32(WorkgroupNormalBarrierId)); + return CreateIntrinsic(Intrinsic::amdgcn_s_barrier_wait, {}, + getInt16(static_cast(WorkgroupNormalBarrierId))); + } +#endif return CreateIntrinsic(Intrinsic::amdgcn_s_barrier, {}, {}); } diff --git a/lgc/builder/SubgroupBuilder.cpp b/lgc/builder/SubgroupBuilder.cpp index 6b3e4fde5f..a679f497f0 100644 --- a/lgc/builder/SubgroupBuilder.cpp +++ b/lgc/builder/SubgroupBuilder.cpp @@ -872,6 +872,15 @@ Value *BuilderImpl::CreateSubgroupClusteredMultiExclusive(GroupArithOp groupArit Value *isPreviousLaneValid = CreateICmpNE(preClusterMask, constZero); Value *previousLaneIndex = createFindMsb(preClusterMask); Value *previousLaneValue = nullptr; +#if LLPC_BUILD_GFX12 + // v_permLane16_var can only shuffle within clusters of 16. For log2ClusterSize == 4, v_permLanex16_var needs to be + // used to fetch a value from the other 16-row. For log2ClusterSize == 5, we need the full power of the subgroup + // shuffle. + if (log2ClusterSize < 5 && supportPermLaneVar()) { + previousLaneValue = log2ClusterSize < 4 ? createPermLane16Var(result, result, previousLaneIndex, false, true) + : createPermLaneX16Var(result, result, previousLaneIndex, false, true); + } else +#endif { previousLaneValue = createSubgroupShuffle(SubgroupHelperLaneState::get(std::nullopt, state.requireHelperLanes), result, previousLaneIndex, m_shaderStage.value(), instName); @@ -906,6 +915,15 @@ Value *BuilderImpl::CreateSubgroupQuadBroadcast(Value *const value, Value *const Value *result = PoisonValue::get(value->getType()); const unsigned indexBits = index->getType()->getPrimitiveSizeInBits(); +#if LLPC_BUILD_GFX12 + if (supportPermLaneVar()) { + Value *laneId = CreateSubgroupMbcnt(getInt64(UINT64_MAX), ""); + + // The gather lane pattern = (laneId & ~0x3 | index & 0x3) + Value *select = CreateOr(CreateAnd(laneId, CreateNot(getInt32(0x3))), CreateAnd(index, getInt32(0x3))); + result = createPermLane16Var(value, value, select, false, true); + } else +#endif { Value *compare = CreateICmpEQ(index, getIntN(indexBits, 0)); result = CreateSelect(compare, createDppMov(value, DppCtrl::DppQuadPerm0000, 0xF, 0xF, true), result); @@ -1586,3 +1604,55 @@ Value *BuilderImpl::CreateQuadAny(Value *const value, bool requireFullQuads, con result = createWqm(result); return result; } + +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Create a call to permute var lane within a row. +// +// @param origValue : The original value we are going to update. +// @param updateValue : The value to update with. +// @param select : Select VGPR. +// @param fetchInactive : FI mode, whether to fetch inactive lane. +// @param boundCtrl : Whether bound_ctrl is used or not. +Value *BuilderImpl::createPermLane16Var(Value *const origValue, Value *const updateValue, Value *const select, + bool fetchInactive, bool boundCtrl) { + auto mapFunc = [this](BuilderBase &builder, ArrayRef mappedArgs, + ArrayRef passthroughArgs) -> Value * { + return builder.CreateIntrinsic( + getInt32Ty(), Intrinsic::amdgcn_permlane16_var, + {mappedArgs[0], mappedArgs[1], passthroughArgs[0], passthroughArgs[1], passthroughArgs[2]}); + }; + + return CreateMapToSimpleType(mapFunc, + { + origValue, + updateValue, + }, + {select, getInt1(fetchInactive), getInt1(boundCtrl)}); +} + +// ===================================================================================================================== +// Create a call to permute var lane across two rows. +// +// @param origValue : The original value we are going to update. +// @param updateValue : The value to update with. +// @param select : Select VGPR. +// @param fetchInactive : FI mode, whether to fetch inactive lane. +// @param boundCtrl : Whether bound_ctrl is used or not. +Value *BuilderImpl::createPermLaneX16Var(Value *const origValue, Value *const updateValue, Value *const select, + bool fetchInactive, bool boundCtrl) { + auto mapFunc = [this](BuilderBase &builder, ArrayRef mappedArgs, + ArrayRef passthroughArgs) -> Value * { + return builder.CreateIntrinsic( + getInt32Ty(), Intrinsic::amdgcn_permlanex16_var, + {mappedArgs[0], mappedArgs[1], passthroughArgs[0], passthroughArgs[1], passthroughArgs[2]}); + }; + + return CreateMapToSimpleType(mapFunc, + { + origValue, + updateValue, + }, + {select, getInt1(fetchInactive), getInt1(boundCtrl)}); +} +#endif diff --git a/lgc/builder/YCbCrAddressHandler.cpp b/lgc/builder/YCbCrAddressHandler.cpp index b8912956a2..53b7dbcc0e 100644 --- a/lgc/builder/YCbCrAddressHandler.cpp +++ b/lgc/builder/YCbCrAddressHandler.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2016-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2016-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -57,6 +57,9 @@ void YCbCrAddressHandler::genBaseAddress(unsigned planeCount) { Value *pipeBankXorNone = m_builder->getInt32(0); switch (m_gfxIp->major) { +#if LLPC_BUILD_GFX12 + case 12: +#endif case 11: { pipeBankXor1 = pipeBankXorNone; pipeBankXor2 = pipeBankXorNone; @@ -136,6 +139,9 @@ void YCbCrAddressHandler::genHeightAndPitch(unsigned bits, unsigned bpp, unsigne m_swizzleMode = m_regHandler->getReg(SqRsrcRegs::SwizzleMode); switch (m_gfxIp->major) { +#if LLPC_BUILD_GFX12 + case 12: +#endif case 11: case 10: { const unsigned elementBytes = bpp >> 3; diff --git a/lgc/builder/YCbCrConverter.cpp b/lgc/builder/YCbCrConverter.cpp index 7a9de464f1..4ba794de25 100644 --- a/lgc/builder/YCbCrConverter.cpp +++ b/lgc/builder/YCbCrConverter.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -385,6 +385,18 @@ void YCbCrConverter::genImgDescChroma() { m_builder->getInt32(BuilderImpl::ImgFmtGfx11::IMG_FMT_8_8_8_8_UNORM__GFX104PLUS)); break; } +#if LLPC_BUILD_GFX12 + case 12: { + isGbGrFmt = + m_builder->CreateICmpEQ(imgDataFmt, m_builder->getInt32(BuilderImpl::ImgFmtGfx12::IMG_FMT_BG_RG_UNORM)); + isBgRgFmt = + m_builder->CreateICmpEQ(imgDataFmt, m_builder->getInt32(BuilderImpl::ImgFmtGfx12::IMG_FMT_GB_GR_UNORM)); + + proxySqRsrcRegHelper.setReg(SqRsrcRegs::Format, + m_builder->getInt32(BuilderImpl::ImgFmtGfx12::IMG_FMT_8_8_8_8_UNORM)); + break; + } +#endif default: llvm_unreachable("GFX IP not supported!"); break; diff --git a/lgc/include/lgc/builder/BuilderImpl.h b/lgc/include/lgc/builder/BuilderImpl.h index 6832f3a5ae..dbdb2bd4b0 100644 --- a/lgc/include/lgc/builder/BuilderImpl.h +++ b/lgc/include/lgc/builder/BuilderImpl.h @@ -79,6 +79,11 @@ class BuilderImpl : public BuilderDefs { // Get whether the context we are building in supports permute lane 64 DPP operations. bool supportPermLane64Dpp() const; +#if LLPC_BUILD_GFX12 + // Get whether the context we are building in supports permute lane var operations. + bool supportPermLaneVar() const; +#endif + // Helper method to scalarize a possibly vector unary operation llvm::Value *scalarize(llvm::Value *value, const std::function &callback); @@ -482,6 +487,14 @@ class BuilderImpl : public BuilderDefs { IMG_FMT_BG_RG_UNORM__GFX104PLUS = 86, }; +#if LLPC_BUILD_GFX12 + enum ImgFmtGfx12 { + IMG_FMT_8_8_8_8_UNORM = 42, + IMG_FMT_GB_GR_UNORM = 82, + IMG_FMT_BG_RG_UNORM = 86, + }; +#endif + static const unsigned AtomicOpCompareSwap = 1; bool m_isFmaskLoad = false; // If set true, we need load a full descriptor @@ -850,6 +863,13 @@ class BuilderImpl : public BuilderDefs { llvm::Value *const index, ShaderStageEnum shaderStage, const llvm::Twine &instName); llvm::Value *createWqm(llvm::Value *const value); + +#if LLPC_BUILD_GFX12 + llvm::Value *createPermLane16Var(llvm::Value *const origValue, llvm::Value *const updateValue, + llvm::Value *const select, bool fetchInactive, bool boundCtrl); + llvm::Value *createPermLaneX16Var(llvm::Value *const origValue, llvm::Value *const updateValue, + llvm::Value *const select, bool fetchInactive, bool boundCtrl); +#endif }; } // namespace lgc diff --git a/lgc/include/lgc/lowering/LowerBufferOperations.h b/lgc/include/lgc/lowering/LowerBufferOperations.h index f88a232756..c029689d28 100644 --- a/lgc/include/lgc/lowering/LowerBufferOperations.h +++ b/lgc/include/lgc/lowering/LowerBufferOperations.h @@ -132,6 +132,10 @@ class BufferOpLowering { const llvm::function_ref callback); llvm::Value *createLoadDesc(llvm::Value *buffAddress, bool forceRawView, bool isCompact); +#if LLPC_BUILD_GFX12 + ShaderStageEnum getMemoryInstShaderStage(llvm::Instruction *inst); +#endif + compilerutils::TypeLowering &m_typeLowering; BuilderImpl m_builder; diff --git a/lgc/include/lgc/lowering/LowerCooperativeMatrix.h b/lgc/include/lgc/lowering/LowerCooperativeMatrix.h index 02b935c9c8..9d52f5886b 100644 --- a/lgc/include/lgc/lowering/LowerCooperativeMatrix.h +++ b/lgc/include/lgc/lowering/LowerCooperativeMatrix.h @@ -63,6 +63,10 @@ class CooperativeMatrixTimesScalarOp; class CooperativeMatrixMulAddOp; class CooperativeMatrixPackOp; class CooperativeMatrixUnPackOp; +#if LLPC_BUILD_GFX12 +class SparsityIndexLoadOp; +class SparseCooperativeMatrixMulAddOp; +#endif } // namespace xdl // ===================================================================================================================== @@ -117,6 +121,15 @@ class LowerCooperativeMatrix : public Patch, public llvm::PassInfoMixingetTargetInfo().getGfxIpVersion().major >= 12) && + !(m_pipelineState->getOptions().disableDynamicVgpr); + } + + llvm::Function *createRetryVgprAllocFunc(llvm::FixedVectorType *sgprsTy); +#endif + bool useInitWholeWave() const; bool m_hasTs; // Whether the pipeline has tessllation shader diff --git a/lgc/include/lgc/lowering/ShaderInputs.h b/lgc/include/lgc/lowering/ShaderInputs.h index 4cfae17021..138ed65801 100644 --- a/lgc/include/lgc/lowering/ShaderInputs.h +++ b/lgc/include/lgc/lowering/ShaderInputs.h @@ -139,6 +139,9 @@ enum class ShaderInput : unsigned { // Task/CS VGPRs LocalInvocationId, // LocalInvocationId (v3i32) +#if LLPC_BUILD_GFX12 + CsWaveId, // CS wave Id +#endif Count }; diff --git a/lgc/include/lgc/state/Abi.h b/lgc/include/lgc/state/Abi.h index c090c2070d..d605255d95 100644 --- a/lgc/include/lgc/state/Abi.h +++ b/lgc/include/lgc/state/Abi.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -133,9 +133,21 @@ struct PrimShaderCbLayout { }; /// Constant buffer used by SW stream-out processing (GFX11+). +#if LLPC_BUILD_GFX12 +struct OrderedIdPair { + unsigned orderedWaveId; + unsigned dwordsWritten; +}; +#endif struct StreamOutControlCb { unsigned bufOffsets[MaxTransformFeedbackBuffers]; +#if LLPC_BUILD_GFX12 + // Following data are only available on GFX12+ (caused by GDS removal) + uint64_t primsNeeded[MaxGsStreams]; + uint64_t primsWritten[MaxGsStreams]; + OrderedIdPair orderedIdPair[MaxTransformFeedbackBuffers]; +#endif }; } // namespace Abi diff --git a/lgc/include/lgc/state/AbiMetadata.h b/lgc/include/lgc/state/AbiMetadata.h index 9b2b139aef..5bf69f2bc0 100644 --- a/lgc/include/lgc/state/AbiMetadata.h +++ b/lgc/include/lgc/state/AbiMetadata.h @@ -190,6 +190,10 @@ static constexpr char UserDataRegMap[] = ".user_data_reg_map"; static constexpr char ImageOp[] = ".image_op"; static constexpr char FrontendStackSize[] = ".frontend_stack_size"; static constexpr char ShaderSpillThreshold[] = ".shader_spill_threshold"; +#if LLPC_BUILD_GFX12 +static constexpr char WorkgroupRoundRobin[] = ".wg_round_robin"; +static constexpr char OutgoingVgprCount[] = ".outgoing_vgpr_count"; +#endif }; // namespace HardwareStageMetadataKey namespace ShaderMetadataKey { @@ -204,6 +208,10 @@ static constexpr char TgidYEn[] = ".tgid_y_en"; static constexpr char TgidZEn[] = ".tgid_z_en"; static constexpr char TgSizeEn[] = ".tg_size_en"; static constexpr char TidigCompCnt[] = ".tidig_comp_cnt"; +#if LLPC_BUILD_GFX12 +static constexpr char XInterleave[] = ".x_interleave"; +static constexpr char YInterleave[] = ".y_interleave"; +#endif }; // namespace ComputeRegisterMetadataKey namespace GraphicsRegisterMetadataKey { @@ -705,6 +713,9 @@ constexpr unsigned mmVGT_GS_OUT_PRIM_TYPE = 0xA29B; constexpr unsigned mmVGT_GS_OUT_PRIM_TYPE_GFX11 = 0xC266; constexpr unsigned mmSPI_SHADER_PGM_LO_GS = 0x2C88; +#if LLPC_BUILD_GFX12 +constexpr unsigned mmSPI_SHADER_PGM_LO_GS_GFX12 = 0x2C84; +#endif // Register bitfield layout. diff --git a/lgc/include/lgc/state/Defs.h b/lgc/include/lgc/state/Defs.h index 8f12742c7e..8cd0662e08 100644 --- a/lgc/include/lgc/state/Defs.h +++ b/lgc/include/lgc/state/Defs.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -67,6 +67,10 @@ const static char CopyShaderEntryPoint[] = "lgc.shader.COPY.main"; const static char NullFsEntryPoint[] = "lgc.shader.FS.null.main"; const static char TcsPassthroughEntryPoint[] = "lgc.shader.TCS.passthrough.main"; +#if LLPC_BUILD_GFX12 +const static char SparsityIndexLoad[] = "lgc.xdl.sparsityindex.load"; +const static char SparseCooperativeMatrixMulAdd[] = "lgc.xdl.sparseCooperativeMatrix.muladd"; +#endif } // namespace lgcName // Value for high half of address that means "use PC". @@ -75,6 +79,10 @@ const static unsigned HighAddrPc = ~0U; // Well-known metadata names const static char MetaNameUniform[] = "amdgpu.uniform"; +#if LLPC_BUILD_GFX12 +const static char MetaNameBufferOpStage[] = "lgc.bufferOp.stage"; +const static char MetaNameBufferOpLlc[] = "lgc.bufferOp.llc"; +#endif // Maximum count of input/output locations that a shader stage (except fragment shader outputs) is allowed to specify static const unsigned MaxInOutLocCount = 32; diff --git a/lgc/include/lgc/state/IntrinsDefs.h b/lgc/include/lgc/state/IntrinsDefs.h index d25e783bcc..690266d037 100644 --- a/lgc/include/lgc/state/IntrinsDefs.h +++ b/lgc/include/lgc/state/IntrinsDefs.h @@ -66,6 +66,11 @@ static const unsigned CopyShaderEntryArgIdxStreamOffset = 4; // Entry-point argument index for the LDS offset of current vertices in GS-VS ring static const unsigned CopyShaderEntryArgIdxVertexOffset = 8; +#if LLPC_BUILD_GFX12 +// Barrier ID of per-workgroup normal barrier (-2 is for trap barrier while 0 is for null barrier) +static const unsigned WorkgroupNormalBarrierId = -1; +#endif + // Enumerates the target for "export" instruction. enum ExportTarget { EXP_TARGET_MRT_0 = 0, // MRT 0..7 @@ -548,6 +553,18 @@ union SqBufRsrcWord3 { unsigned : 2; } gfx11; +#if LLPC_BUILD_GFX12 + struct { + unsigned : 12; + unsigned format : 6; + unsigned : 7; + unsigned compressionEn : 1; + unsigned : 2; + unsigned oobSelect : 2; + unsigned : 2; + } gfx12; +#endif + unsigned u32All; }; @@ -639,6 +656,11 @@ enum PolyModeType { POLY_MODE_TRIANGLES = 2, }; +#if LLPC_BUILD_GFX12 +// Enumerates scope of memory +enum MemoryScope { MEMORY_SCOPE_CU, MEMORY_SCOPE_SE, MEMORY_SCOPE_DEV, MEMORY_SCOPE_SYS }; +#endif + // Represents the coherent flag used in buffer intrinsics union CoherentFlag { struct { @@ -649,6 +671,15 @@ union CoherentFlag { unsigned : 28; } bits; +#if LLPC_BUILD_GFX12 + struct { + unsigned th : 3; // Temporal hints + unsigned scope : 2; // Scope of memory + unsigned nv : 1; // Non-volatile + unsigned swz : 1; // Swizzled buffer + unsigned : 25; + } gfx12; +#endif unsigned u32All; }; diff --git a/lgc/include/lgc/state/PipelineState.h b/lgc/include/lgc/state/PipelineState.h index 8d6d28dfe4..ad26f9b3ca 100644 --- a/lgc/include/lgc/state/PipelineState.h +++ b/lgc/include/lgc/state/PipelineState.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -441,6 +441,11 @@ class PipelineState final : public Pipeline { return m_xfbStateMetadata.streamActive[streamId]; } +#if LLPC_BUILD_GFX12 + // Get the temporal hint. + unsigned getTemporalHint(unsigned th, TemporalHintOpType opType, ShaderStageEnum stage = ShaderStageEnum::Invalid); +#endif + // Set user data for a specific shader stage void setUserDataMap(ShaderStageEnum shaderStage, llvm::ArrayRef userDataValues) { m_userDataMaps[shaderStage].clear(); diff --git a/lgc/include/lgc/state/TargetInfo.h b/lgc/include/lgc/state/TargetInfo.h index 539b38c860..aaba7919ec 100644 --- a/lgc/include/lgc/state/TargetInfo.h +++ b/lgc/include/lgc/state/TargetInfo.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -126,6 +126,17 @@ struct WorkaroundFlags { }; unsigned u32All; } gfx11; + +#if LLPC_BUILD_GFX12 + union { + struct { + // Due to an issue in the DB, we cannot support ReZ in the compiler when this workaround is active. + unsigned waNoReZSupport : 1; + unsigned reserved : 31; + }; + unsigned u32All; + } gfx12; +#endif }; // ===================================================================================================================== diff --git a/lgc/interface/lgc/Pipeline.h b/lgc/interface/lgc/Pipeline.h index 0ca7a91fa3..aa93d4c8d7 100644 --- a/lgc/interface/lgc/Pipeline.h +++ b/lgc/interface/lgc/Pipeline.h @@ -126,6 +126,41 @@ enum class LlvmScheduleStrategy : unsigned { MaxIlp = 2 // Maximize ILP }; +#if LLPC_BUILD_GFX12 +// Enumerate the cache policy type +enum CacheScopePolicyType { + AtmWriteUseSystemScope = 0x1, // Attributes through memory stores will use system scope and avoid occupying any + // lines in GL2. +}; + +// Handle temporal hint, a store/load occupies 4 bits. +enum TemporalHintOpType { + TemporalHintAtmWrite = 0, + TemporalHintImageRead = 4, + TemporalHintImageWrite = 8, + TemporalHintTessFactorWrite = 12, + TemporalHintTessRead = 16, + TemporalHintTessWrite = 20, + TemporalHintBufferRead = 24, + TemporalHintBufferWrite = 28, +}; + +// Enumerates temporal hints +enum TH { + TH_RT = 0, // regular + TH_NT = 1, // non-temporal + TH_HT = 2, // high-temporal + TH_LU = 3, // last use + TH_WB = 3, // regular (CU, SE), high-temporal with write-back (MALL) + TH_NT_RT = 4, // non-temporal (CU, SE), regular (MALL) + TH_RT_NT = 5, // regular (CU, SE), non-temporal (MALL) + TH_NT_HT = 6, // non - temporal(CU, SE), high - temporal(MALL) + TH_NT_WB = 7, // non-temporal (CU, SE), high-temporal with write-back (MALL) + + TH_RESERVED = 7, // unused value for load insts +}; +#endif + // Value for shadowDescriptorTable pipeline option. static const unsigned ShadowDescriptorTableDisable = ~0U; @@ -179,13 +214,21 @@ union Options { unsigned reverseThreadGroupBufferBinding; // Binding ID of the internal buffer for reverse thread group optimization bool internalRtShaders; // Enable internal RT shader intrinsics bool enableUberFetchShader; // Enable UberShader +#if LLPC_BUILD_GFX12 + bool expertSchedulingMode; // Enable gfx12 expert scheduling mode 2. +#else bool reserved16; +#endif bool disableTruncCoordForGather; // If set, trunc_coord of sampler srd is disabled for gather4 bool enableColorExportShader; // Explicitly build color export shader, UnlinkedStageFragment elf will return extra // meta data. bool fragCoordUsesInterpLoc; // Determining fragCoord use InterpLoc bool disableSampleMask; // Disable export of sample mask from PS +#if LLPC_BUILD_GFX12 + unsigned cacheScopePolicyControl; // Control cache scope policy. attributes-through-memory read/write is available +#else unsigned reserved20; +#endif RayTracingIndirectMode rtIndirectMode; // Ray tracing indirect mode bool enablePrimGeneratedQuery; // Whether to enable primitive generated counter bool enableFragColor; // If enabled, do frag color broadcast @@ -195,7 +238,12 @@ union Options { unsigned rtStaticPipelineFlags; // Ray tracing static pipeline flags unsigned rtTriCompressMode; // Ray tracing triangle compression mode bool useGpurt; // Whether GPURT is used +#if LLPC_BUILD_GFX12 + bool disableDynamicVgpr; // Whether to disable dynamic VGPR mode for continuations. If not set, dVGPR mode is + // enabled by default. +#else bool reserved21; +#endif bool disablePerCompFetch; // Disable per component fetch in uber fetch shader. bool maskOffNullDescriptorTypeField; // If true, mask off the type field of word3 from a null descriptor. bool vbAddressLowBitsKnown; // Use vertex buffer offset low bits from driver. @@ -203,7 +251,11 @@ union Options { bool sampleMaskExportOverridesAlphaToCoverage; // Whether to use sample mask export overriding alpha to coverage bool disableSampleCoverageAdjust; // Disable the adjustment of sample coverage bool forceNullFsDummyExport; // Force dummy export to be added for null fragment shader +#if LLPC_BUILD_GFX12 + unsigned dynamicVgprBlockSize; // The VGPR allocation granule for dynamic VGPR mode. +#else unsigned reserved22; +#endif bool dynamicTopology; // Whether primitive topology is dynamic. bool robustBufferAccess; // Enable the core robust buffer access bool reserved23; @@ -212,11 +264,25 @@ union Options { // eliminating it if the write value is 1.0. bool enableMapClipDistMask; // For OGL only, whether to remap the clip distances. unsigned clipPlaneMask; // For OGL only, defines the bitmask for enabling/disabling clip planes. +#if LLPC_BUILD_GFX12 + unsigned temporalHintControl; // Override value for temporal hint. A load/store occupies 4 bits. The highest bit + // of 4 bits marks whether to override temporal hint. + // Arrange from the low bit to high bit in the following order: + // TemporalHintAtmWrite,TemporalHintImageRead, TemporalHintImageWrite, + // TemporalHintTessFactorWrite, TemporalHintTessRead, TemporalHintTessWrite + // TemporalHintBufferRead, TemporalHintBufferWrite +#else unsigned reserved24; +#endif bool checkRawBufferAccessDescStride; // Check descriptor stride to workaround an issue that a strided buffer desc is // used for a raw buffer access instruction. bool padBufferSizeToNextDword; // Vulkan only, set if the driver rounds the buffer size up the next dword +#if LLPC_BUILD_GFX12 + unsigned xInterleave; // Log2 X interleave size. + unsigned yInterleave; // Log2 Y interleave size. +#else unsigned reserved26[2]; +#endif bool reserved27; }; }; @@ -333,7 +399,12 @@ union ShaderOptions { /// Aggressively mark shader loads as invariant (where it is safe to do so). InvariantLoadsOption aggressiveInvariantLoads; +#if LLPC_BUILD_GFX12 + // Enable shader round-robin mode for waves within workgroup. + bool workgroupRoundRobin; +#else bool reserved; +#endif /// Let dmask bits be fully enabled when call 'image.sample.c', for depth compare mode swizzling workaround. bool imageSampleDrefReturnsRgba; @@ -344,6 +415,11 @@ union ShaderOptions { /// Force underflow prevention for log and pow bool forceUnderflowPrevention; +#if LLPC_BUILD_GFX12 + /// Override value for temporal hint for image and buffer + unsigned temporalHintShaderControl; +#endif + /// Choose llvm's instruction scheduling strategy. LlvmScheduleStrategy scheduleStrategy; }; diff --git a/lgc/interface/lgc/RayTracingLibrarySummary.h b/lgc/interface/lgc/RayTracingLibrarySummary.h index 6c56d7ecf9..2cd17b76bf 100644 --- a/lgc/interface/lgc/RayTracingLibrarySummary.h +++ b/lgc/interface/lgc/RayTracingLibrarySummary.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -60,6 +60,11 @@ struct RayTracingLibrarySummary { // attributes (no AHS/IS/CHS). unsigned maxHitAttributeSize = 0; +#if LLPC_BUILD_GFX12 + // The maximum outgoing VGPR count for dynamic VGPRs. + unsigned maxOutgoingVgprCount = 0; +#endif + // Whether a kernel entry function was built for this library. bool hasKernelEntry = false; diff --git a/lgc/interface/lgc/RegStackUsage.h b/lgc/interface/lgc/RegStackUsage.h index 421ce14038..db995638bb 100644 --- a/lgc/interface/lgc/RegStackUsage.h +++ b/lgc/interface/lgc/RegStackUsage.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -75,6 +75,11 @@ class RegStackUsage { // void finalizeAndUpdate(llvm::SmallVectorImpl &elfBuffer, size_t startOffset, unsigned frontendGlobalAlignment); +#if LLPC_BUILD_GFX12 + // Get the max outgoing VGPR count. + unsigned getMaxOutgoingVgprCount() const; +#endif + private: std::unique_ptr m_impl; }; diff --git a/lgc/lowering/AddBufferOperationMetadata.cpp b/lgc/lowering/AddBufferOperationMetadata.cpp new file mode 100644 index 0000000000..11916f66ae --- /dev/null +++ b/lgc/lowering/AddBufferOperationMetadata.cpp @@ -0,0 +1,180 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file AddBufferOperationMetadata.cpp + * @brief LLPC source file: contains implementation of class lgc::AddBufferOperationMetadata. + *********************************************************************************************************************** + */ +#include "lgc/lowering/AddBufferOperationMetadata.h" +#include "lgc/Builder.h" +#include "lgc/LgcDialect.h" +#include "lgc/lowering/LgcLowering.h" +#include "lgc/state/PipelineState.h" +#include "llvm-dialects/Dialect/Visitor.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "lgc-add-metadata-for-buffer-operations" + +using namespace llvm; +using namespace lgc; + +namespace lgc { + +// ===================================================================================================================== +// Executes this LGC lowering pass on the specified LLVM module. +// +// @param [in/out] function : Function that we will patch. +// @param [in/out] analysisManager : Analysis manager to use for this transformation +// @returns : The preserved analyses (The analyses that are still valid after this pass) +PreservedAnalyses AddBufferOperationMetadata::run(llvm::Function &function, + llvm::FunctionAnalysisManager &analysisManager) { + + const auto &moduleAnalysisManager = analysisManager.getResult(function); + m_pipelineState = + moduleAnalysisManager.getCachedResult(*function.getParent())->getPipelineState(); + + LLVM_DEBUG(dbgs() << "Run the pass Add-Buffer-Operation-Metadata\n"); + + auto stage = getShaderStage(&function); + if (!stage) + return PreservedAnalyses::all(); + + m_context = &function.getContext(); + m_stageMDNode = MDNode::get( + *m_context, {ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(function.getContext()), stage.value()))}); + + static const auto visitor = llvm_dialects::VisitorBuilder() + .add(&AddBufferOperationMetadata::visitLoadInst) + .add(&AddBufferOperationMetadata::visitStoreInst) + .add(&AddBufferOperationMetadata::visitMemCpyInst) + .add(&AddBufferOperationMetadata::visitMemMoveInst) + .add(&AddBufferOperationMetadata::visitMemSetInst) + .add(&AddBufferOperationMetadata::visitLoadBufferDesc) + .add(&AddBufferOperationMetadata::visitLoadStridedBufferDesc) + .build(); + visitor.visit(*this, function); + + return PreservedAnalyses::none(); +} + +// ===================================================================================================================== +// Visits "load" instruction. +// +// @param loadInst : The instruction +void AddBufferOperationMetadata::visitLoadInst(llvm::LoadInst &loadInst) { + if (isAnyBufferPointer(loadInst.getPointerOperand())) + loadInst.setMetadata(MetaNameBufferOpStage, m_stageMDNode); +} + +// ===================================================================================================================== +// Visits "store" instruction. +// +// @param storeInst : The instruction +void AddBufferOperationMetadata::visitStoreInst(llvm::StoreInst &storeInst) { + if (isAnyBufferPointer(storeInst.getPointerOperand())) + storeInst.setMetadata(MetaNameBufferOpStage, m_stageMDNode); +} + +// ===================================================================================================================== +// Post-process visits "memcpy" instruction. +// +// @param memCpyInst : The memcpy instruction +void AddBufferOperationMetadata::visitMemCpyInst(llvm::MemCpyInst &memCpyInst) { + Value *const dest = memCpyInst.getArgOperand(0); + Value *const src = memCpyInst.getArgOperand(1); + if (isAnyBufferPointer(src) || isAnyBufferPointer(dest)) + memCpyInst.setMetadata(MetaNameBufferOpStage, m_stageMDNode); +} + +// ===================================================================================================================== +// Visits "memmove" instruction. +// +// @param memMoveInst : The memmove instruction +void AddBufferOperationMetadata::visitMemMoveInst(llvm::MemMoveInst &memMoveInst) { + Value *const dest = memMoveInst.getArgOperand(0); + Value *const src = memMoveInst.getArgOperand(1); + if (isAnyBufferPointer(src) || isAnyBufferPointer(dest)) + memMoveInst.setMetadata(MetaNameBufferOpStage, m_stageMDNode); +} + +// ===================================================================================================================== +// Visits "memset" instruction. +// +// @param memSetInst : The memset instruction +void AddBufferOperationMetadata::visitMemSetInst(llvm::MemSetInst &memSetInst) { + Value *const dest = memSetInst.getArgOperand(0); + if (isAnyBufferPointer(dest)) + memSetInst.setMetadata(MetaNameBufferOpStage, m_stageMDNode); +} + +// ===================================================================================================================== +// Determine if a value is a buffer pointer. A buffer pointer is either a BUFFER_FAT_POINTER or +// a BUFFER_STRIDED_POINTER +// +// @param value : The value to check +bool AddBufferOperationMetadata::isAnyBufferPointer(const Value *const value) { + return value->getType() == PointerType::get(*m_context, ADDR_SPACE_BUFFER_FAT_POINTER) || + value->getType() == PointerType::get(*m_context, ADDR_SPACE_BUFFER_STRIDED_POINTER); +} + +// ===================================================================================================================== +// Visits a load.buffer.desc operation +// +// @param op : the operation +void AddBufferOperationMetadata::visitLoadBufferDesc(LoadBufferDescOp &op) { + if (op.getFlags() & Builder::BufferFlagLLcNoAlloc) + addLlcMetadata(op.getDescSet(), op.getBinding(), &op); +} + +// ===================================================================================================================== +// Visits a load.strided.buffer.desc operation +// +// @param op : the operation +void AddBufferOperationMetadata::visitLoadStridedBufferDesc(LoadStridedBufferDescOp &op) { + if (op.getFlags() & Builder::BufferFlagLLcNoAlloc) + addLlcMetadata(op.getDescSet(), op.getBinding(), &op); +} + +// ===================================================================================================================== +// Add LLC metadata +// +// @param inst : The instruction +void AddBufferOperationMetadata::addLlcMetadata(unsigned set, unsigned binding, llvm::Value *inst) { + SmallVector users(inst->users()); + while (!users.empty()) { + auto user = users.pop_back_val(); + if (auto gep = dyn_cast(user)) { + users.push_back(gep); + } else if (auto load = dyn_cast(user)) { + load->setMetadata(MetaNameBufferOpLlc, MDNode::get(*m_context, {})); + } else if (auto store = dyn_cast(user)) { + store->setMetadata(MetaNameBufferOpLlc, MDNode::get(*m_context, {})); + } + } +} + +} // namespace lgc diff --git a/lgc/lowering/InitializeWorkgroupMemory.cpp b/lgc/lowering/InitializeWorkgroupMemory.cpp index a50924acfd..1194393b20 100644 --- a/lgc/lowering/InitializeWorkgroupMemory.cpp +++ b/lgc/lowering/InitializeWorkgroupMemory.cpp @@ -235,7 +235,13 @@ void InitializeWorkgroupMemory::initializeWithZero(GlobalVariable *lds, BuilderB if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier, {}, {}); } else { +#if LLPC_BUILD_GFX12 + builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier_signal, {}, builder.getInt32(WorkgroupNormalBarrierId)); + builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier_wait, {}, + builder.getInt16(static_cast(WorkgroupNormalBarrierId))); +#else llvm_unreachable("Not implemented!"); +#endif } builder.CreateFence(AtomicOrdering::Acquire, workgroupScope); } diff --git a/lgc/lowering/LgcLowering.cpp b/lgc/lowering/LgcLowering.cpp index 40727b4fd6..01aaeea3c6 100644 --- a/lgc/lowering/LgcLowering.cpp +++ b/lgc/lowering/LgcLowering.cpp @@ -37,6 +37,9 @@ #include "lgc/PassManager.h" #include "lgc/Pipeline.h" #include "lgc/builder/BuilderReplayer.h" +#if LLPC_BUILD_GFX12 +#include "lgc/lowering/AddBufferOperationMetadata.h" +#endif #include "lgc/lowering/AddLoopMetadata.h" #include "lgc/lowering/ApplyWorkarounds.h" #include "lgc/lowering/CheckShaderCache.h" @@ -201,6 +204,11 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T passMgr.addPass(LowerVertexFetch()); passMgr.addPass(LowerFragmentColorExport()); passMgr.addPass(LowerDebugPrintf()); +#if LLPC_BUILD_GFX12 + // Mark shader stage for load/store. + if (pipelineState->getTargetInfo().getGfxIpVersion().major >= 12) + passMgr.addPass(createModuleToFunctionPassAdaptor(AddBufferOperationMetadata())); +#endif passMgr.addPass(LowerDesc()); passMgr.addPass(MutateEntryPoint()); passMgr.addPass(createModuleToFunctionPassAdaptor(LowerPopsInterlock())); diff --git a/lgc/lowering/LowerBufferOperations.cpp b/lgc/lowering/LowerBufferOperations.cpp index 7ae6953789..196bdc1037 100644 --- a/lgc/lowering/LowerBufferOperations.cpp +++ b/lgc/lowering/LowerBufferOperations.cpp @@ -409,6 +409,10 @@ void BufferOpLowering::visitAtomicCmpXchgInst(AtomicCmpXchgInst &atomicCmpXchgIn CoherentFlag coherent = {}; if (m_pipelineState.getTargetInfo().getGfxIpVersion().major <= 11) coherent.bits.slc = isNonTemporal ? 1 : 0; +#if LLPC_BUILD_GFX12 + else + coherent.gfx12.th = isNonTemporal ? TH::TH_NT : TH::TH_RT; +#endif Value *atomicCall; if (atomicCmpXchgInst.getPointerAddressSpace() == ADDR_SPACE_BUFFER_STRIDED_POINTER) { @@ -575,6 +579,30 @@ void BufferOpLowering::visitAtomicRMWInst(AtomicRMWInst &atomicRmwInst) { if (m_pipelineState.getTargetInfo().getGfxIpVersion().major <= 11) { coherent.bits.slc = isNonTemporal ? 1 : 0; } +#if LLPC_BUILD_GFX12 + else { + coherent.gfx12.th = isNonTemporal ? TH::TH_NT : TH::TH_RT; + + SyncScope::ID id = atomicRmwInst.getSyncScopeID(); + unsigned scope = MemoryScope::MEMORY_SCOPE_CU; + if (id == SyncScope::System) { + scope = MemoryScope::MEMORY_SCOPE_SYS; + } else if (id == SyncScope::SingleThread) { + scope = MemoryScope::MEMORY_SCOPE_CU; + } else { + StringRef name = m_builder.getContext().getSyncScopeName(id).value_or(""); + if (name == "agent") + scope = MemoryScope::MEMORY_SCOPE_DEV; + else if (name == "workgroup") + scope = MemoryScope::MEMORY_SCOPE_SE; + else if (name == "wavefront") + scope = MemoryScope::MEMORY_SCOPE_CU; + else + llvm_unreachable("Invalid sync scope!"); + } + coherent.gfx12.scope = scope; + } +#endif Value *atomicCall; if (isStructBuffer) { @@ -1690,6 +1718,18 @@ Value *BufferOpLowering::replaceLoadStore(Instruction &inst) { if (!isInvariant) coherent.bits.slc = isNonTemporal; } +#if LLPC_BUILD_GFX12 + else { + coherent.gfx12.scope = isGlc ? MemoryScope::MEMORY_SCOPE_DEV : MemoryScope::MEMORY_SCOPE_CU; + if (!isInvariant) + coherent.gfx12.th = m_pipelineState.getTemporalHint(isNonTemporal ? TH::TH_NT : TH::TH_RT, + isLoad ? TemporalHintBufferRead : TemporalHintBufferWrite, + getMemoryInstShaderStage(&inst)); + if (inst.hasMetadata(MetaNameBufferOpLlc)) { + coherent.gfx12.th = TH::TH_RT_NT; + } + } +#endif Value *indexValue = isStridedPointer ? pointerValues[2] : nullptr; if (isLoad) { @@ -1700,6 +1740,13 @@ Value *BufferOpLowering::replaceLoadStore(Instruction &inst) { accessSizeAllowed = accessSize >= 4; } +#if LLPC_BUILD_GFX12 + if (!m_pipelineState.getOptions().padBufferSizeToNextDword && + m_pipelineState.getTargetInfo().getGfxIpVersion().major == 12) { + accessSizeAllowed = accessSize >= 4; + } +#endif + const bool isDivergentPtr = m_uniformityInfo.isDivergent(pointerOperand); if (isInvariant && !isDivergentDesc && accessSizeAllowed && @@ -1992,3 +2039,17 @@ Value *BufferOpLowering::createLoadDesc(Value *buffAddress, bool forceRawView, b } return descriptor; } + +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Get the shader stage +// +// @param inst : The memory operation instruction +// @returns : Return the shader stage to which the instruction belongs. +ShaderStageEnum BufferOpLowering::getMemoryInstShaderStage(llvm::Instruction *inst) { + MDNode *stageMetaNode = inst->getMetadata(MetaNameBufferOpStage); + if (stageMetaNode) + return ShaderStageEnum(mdconst::extract(stageMetaNode->getOperand(0))->getZExtValue()); + return ShaderStageEnum::Invalid; +} +#endif diff --git a/lgc/lowering/LowerCooperativeMatrix.cpp b/lgc/lowering/LowerCooperativeMatrix.cpp index 9086d255c9..b6cc9fcdcd 100644 --- a/lgc/lowering/LowerCooperativeMatrix.cpp +++ b/lgc/lowering/LowerCooperativeMatrix.cpp @@ -86,6 +86,10 @@ const auto FLOAT32 = CooperativeMatrixElementType::Float32; const auto INT8 = CooperativeMatrixElementType::Int8; const auto INT32 = CooperativeMatrixElementType::Int32; const auto BFLOAT16 = CooperativeMatrixElementType::BFloat16; +#if LLPC_BUILD_GFX12 +const auto FLOAT8 = CooperativeMatrixElementType::Float8; +const auto BFLOAT8 = CooperativeMatrixElementType::BFloat8; +#endif const auto INT4 = CooperativeMatrixElementType::Int4; static const std::map WmmaIntrinsicTable = { @@ -100,8 +104,37 @@ static const std::map WmmaIntrinsicTabl {Properties(INT8, INT8, INT32, INT32, 1, 0), Intrinsic::amdgcn_wmma_i32_16x16x16_iu8}, {Properties(INT4, INT4, INT32, INT32, 1, 0), Intrinsic::amdgcn_wmma_i32_16x16x16_iu4}, +#if LLPC_BUILD_GFX12 + {Properties(INT4, INT4, INT32, INT32, 2, 0), Intrinsic::amdgcn_wmma_i32_16x16x32_iu4}, + + {Properties(FLOAT8, FLOAT8, FLOAT32, FLOAT32, 1, 0), Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8}, + {Properties(FLOAT8, BFLOAT8, FLOAT32, FLOAT32, 1, 0), Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8}, + {Properties(BFLOAT8, FLOAT8, FLOAT32, FLOAT32, 1, 0), Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8}, + {Properties(BFLOAT8, BFLOAT8, FLOAT32, FLOAT32, 1, 0), Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8}, + +#endif }; +#if LLPC_BUILD_GFX12 +static const std::map SWmmaIntrinsicTable_gfx12 = { + {Properties(FLOAT16, FLOAT16, FLOAT16, FLOAT16, 1), Intrinsic::amdgcn_swmmac_f16_16x16x32_f16}, + {Properties(FLOAT16, FLOAT16, FLOAT32, FLOAT32, 1), Intrinsic::amdgcn_swmmac_f32_16x16x32_f16}, + + {Properties(BFLOAT16, BFLOAT16, BFLOAT16, BFLOAT16, 1), Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16}, + {Properties(BFLOAT16, BFLOAT16, FLOAT32, FLOAT32, 1), Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16}, + + {Properties(INT8, INT8, INT32, INT32, 1), Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8}, + + {Properties(INT4, INT4, INT32, INT32, 1), Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4}, + {Properties(INT4, INT4, INT32, INT32, 2), Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4}, + + {Properties(FLOAT8, FLOAT8, FLOAT32, FLOAT32, 1), Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8}, + {Properties(FLOAT8, BFLOAT8, FLOAT32, FLOAT32, 1), Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8}, + {Properties(BFLOAT8, FLOAT8, FLOAT32, FLOAT32, 1), Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8}, + {Properties(BFLOAT8, BFLOAT8, FLOAT32, FLOAT32, 1), Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8}, +}; +#endif + static Intrinsic::AMDGCNIntrinsics GetWmmaIntrinsicID(const std::map &intrinsicTable, CooperativeMatrixElementType typeA, CooperativeMatrixElementType typeB, @@ -159,6 +192,10 @@ void LowerCooperativeMatrix::processCoopMatrixFunction(Module &module) { .add(&LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp) .add(&LowerCooperativeMatrix::visitCooperativeMatrixPackOp) .add(&LowerCooperativeMatrix::visitCooperativeMatrixUnPackOp) +#if LLPC_BUILD_GFX12 + .add(&LowerCooperativeMatrix::visitSparsityIndexLoadOp) + .add(&LowerCooperativeMatrix::visitSparseCooperativeMatrixMulAddOp) +#endif .build(); visitor.visit(*this, module); @@ -227,7 +264,43 @@ LowerCooperativeMatrix::TypeProperties LowerCooperativeMatrix::getTypeProperties } else if (layout == CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout || layout == CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout) { props.numFlatElements = 8; - } else { + } +#if LLPC_BUILD_GFX12 + else if (layout == CooperativeMatrixLayout::Gfx12BaseLayout) { + props.numFlatElements = waveSize == 32 ? 8 : 4; + props.numMatrixElements = 8; + if (isTypeNCooperativeMatrix(elemType, 4)) { + props.numFlatElements = 4; + props.numMatrixElements = 4; + props.numMatrixWords = 1; + } else if (isTypeNCooperativeMatrix(elemType, 8)) { + props.numMatrixWords = 2; + } else if (isTypeNCooperativeMatrix(elemType, 16)) { + props.numMatrixWords = 4; + } else { + props.numMatrixWords = 8; + } + } else if (layout == CooperativeMatrixLayout::Gfx12SwizzledKX16Layout) { + if (isTypeNCooperativeMatrix(elemType, 16)) { + props.numFlatElements = waveSize == 32 ? 16 : 8; + props.numMatrixElements = 16; + props.numMatrixWords = 8; + } else if (isTypeNCooperativeMatrix(elemType, 8)) { + props.numFlatElements = waveSize == 32 ? 16 : 8; + props.numMatrixElements = 16; + props.numMatrixWords = 4; + } else if (isTypeNCooperativeMatrix(elemType, 4)) { + assert(kSize >= 32); + const unsigned kMultiplier = kSize / 32; + props.numFlatElements = (waveSize == 32 ? 8 : 4) * kMultiplier; + props.numMatrixElements = 8 * kMultiplier; + props.numMatrixWords = 2 * kMultiplier; + } else { + llvm_unreachable("not implemented!"); + } + } +#endif + else { llvm_unreachable("Unsupported layout!"); } @@ -330,7 +403,52 @@ LowerCooperativeMatrix::computeAddressing(CooperativeMatrixLayout layout, Cooper rowOffsetInFirstVgpr = builder.CreateSelect(evenGroup, builder.getInt32(0), builder.getInt32(2)); addrInfo.macroStep = builder.getInt32(4); addrInfo.microStep = builder.getInt32(1); - } else { + } +#if LLPC_BUILD_GFX12 + else if (layout == CooperativeMatrixLayout::Gfx12BaseLayout) { + Value *baseIn32lane = builder.CreateMul(builder.CreateUDiv(threadId, builder.getInt32(32)), builder.getInt32(4)); + + if (!(isTypeNCooperativeMatrix(elemType, 16) || isTypeNCooperativeMatrix(elemType, 8)) || + isTypeNCooperativeMatrix(elemType, 4)) { + unsigned baseStride = 4; + unsigned rowOffsetStride = 8; + if (isTypeNCooperativeMatrix(elemType, 4) && isColMajor) { + baseStride = 2; + rowOffsetStride = 4; + } + // NOTE: A/B with int4 from first subv only in wave 64 mode + unsigned divisor = isTypeNCooperativeMatrix(elemType, 4) ? 64 : 32; + + Value *baseIn32lane = + builder.CreateMul(builder.CreateUDiv(threadId, builder.getInt32(divisor)), builder.getInt32(baseStride)); + Value *laneGroupIdx = builder.CreateUDiv(threadId, builder.getInt32(16)); + Value *evenGroup = + builder.CreateICmpEQ(builder.CreateAnd(laneGroupIdx, builder.getInt32(1)), builder.getInt32(0)); + rowOffsetInFirstVgpr = builder.CreateSelect(evenGroup, baseIn32lane, + builder.CreateAdd(baseIn32lane, builder.getInt32(rowOffsetStride))); + } else { + Value *offsetIn32lane = + builder.CreateUDiv(builder.CreateSRem(threadId, builder.getInt32(32)), builder.getInt32(16)); + offsetIn32lane = builder.CreateMul(offsetIn32lane, builder.getInt32(8)); + rowOffsetInFirstVgpr = builder.CreateAdd(baseIn32lane, offsetIn32lane); + } + addrInfo.macroStep = builder.getInt32(1); + } else if (layout == CooperativeMatrixLayout::Gfx12SwizzledKX16Layout) { + unsigned rowOffsetStride = 8; // 32 elements are divided into 4 groups + unsigned macroStepStride = 16; // The first group is not next to the second group + if (isColMajor && isTypeNCooperativeMatrix(elemType, 4)) { + // i4vec2 is occupied a byte so the stride is cut in half for continuous accessing in memory + rowOffsetStride = 4; + macroStepStride = 8; + } + rowOffsetInFirstVgpr = + builder.CreateMul(builder.CreateUDiv(threadId, builder.getInt32(16)), builder.getInt32(rowOffsetStride)); + addrInfo.macroStep = (waveSize == 64 ? builder.getInt32(1) : builder.getInt32(macroStepStride)); + addrInfo.microStep = (waveSize == 64 ? builder.getInt32(0) : builder.getInt32(1)); + addrInfo.microCount = (waveSize == 64 ? 1 : rowOffsetStride); + } +#endif + else { llvm_unreachable("This layout is not supported now."); } @@ -361,6 +479,139 @@ LowerCooperativeMatrix::computeAddressing(CooperativeMatrixLayout layout, Cooper return addrInfo; } +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Load contiguous elements from the specified location of the memory. +// @param layout : This is identify for factor(A/B) or accumulator(C) for 16 bit element matrix. +// @param elemType : The element type for the matrix. +// @param waveSize : Identify it's in wave32 or wave64. +// @param stride : The stride in bytes in memory between the first elements of consecutive rows (orcolumns) in the +// source data. Guaranteed to be a multiple of the matrix element size. +// @param insertPos : Where to insert the instruction +Value *LowerCooperativeMatrix::computeLoadtrBaseAddressing(CooperativeMatrixLayout layout, + CooperativeMatrixElementType elemType, int waveSize, + Value *stride, Instruction *insertPos) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(insertPos); + Value *threadId = getLaneNumber(builder); + Value *rowOffsetPerLane = nullptr; + Value *colOffsetPerLane = nullptr; + Value *base = nullptr; + (void)elemType; + assert(waveSize == 32 || waveSize == 64); + + if (layout == CooperativeMatrixLayout::Gfx12BaseLayout) { + if (isTypeNCooperativeMatrix(elemType, 16)) { + Value *rowOffsetAddEight = waveSize == 32 + ? builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt32Ty(), + builder.getInt32(0xffff0000)) + : builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt64Ty(), + builder.getInt64(0xffff0000ffff0000)); + Value *macroRowOffset = builder.CreateSelect(rowOffsetAddEight, builder.getInt32(8), builder.getInt32(0)); + Value *microRowOffset = builder.CreateSRem(threadId, builder.getInt32(8)); + rowOffsetPerLane = builder.CreateAdd(macroRowOffset, microRowOffset); + + Value *colOffsetAddEight = waveSize == 32 + ? builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt32Ty(), + builder.getInt32(0xff00ff00)) + : builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt64Ty(), + builder.getInt64(0xff00ff00ff00ff00)); + colOffsetPerLane = builder.CreateSelect(colOffsetAddEight, builder.getInt32(8), builder.getInt32(0)); + + } else if (isTypeNCooperativeMatrix(elemType, 8)) { + Value *rowOffsetAddEight = waveSize == 32 + ? builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt32Ty(), + builder.getInt32(0xffff0000)) + : builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt64Ty(), + builder.getInt64(0xffff0000ffff0000)); + Value *rowOffsetAddFour = waveSize == 32 + ? builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt32Ty(), + builder.getInt32(0xff00ff00)) + : builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt64Ty(), + builder.getInt64(0xff00ff00ff00ff00)); + Value *macroRowOffset = builder.CreateSelect(rowOffsetAddEight, builder.getInt32(8), builder.getInt32(0)); + Value *subMacroRowOffset = builder.CreateSelect(rowOffsetAddFour, builder.getInt32(4), builder.getInt32(0)); + Value *microRowOffset = builder.CreateSRem(threadId, builder.getInt32(4)); + rowOffsetPerLane = builder.CreateAdd(macroRowOffset, subMacroRowOffset); + rowOffsetPerLane = builder.CreateAdd(rowOffsetPerLane, microRowOffset); + + Value *colOffsetAddEight = waveSize == 32 + ? builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt32Ty(), + builder.getInt32(0xf0f0f0f0)) + : builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt64Ty(), + builder.getInt64(0xf0f0f0f0f0f0f0f0)); + colOffsetPerLane = builder.CreateSelect(colOffsetAddEight, builder.getInt32(8), builder.getInt32(0)); + } + } else { + llvm_unreachable("This layout is not supported now."); + } + + // RowMajor for MatrixB + base = builder.CreateAdd(builder.CreateMul(rowOffsetPerLane, stride), colOffsetPerLane); + return base; +} + +// ===================================================================================================================== +// Get the global_load_tr intrinsic to load the element if it's supported on hw +// @param dataPtr : The pointer to a data array. +// @param layout : This is identify for factor(A/B) or accumulator(C) for 16 bit element matrix. +// @param elemType : The element type for the matrix. +// @param numElements: The element number in one lane. +// @param isColMajor : Identify the order for the data stored in memory, col-major/row-major +// @param waveSize : Identify it's in wave32 or wave64. +// @param instName : Name to give instruction(s). +// @param insertPos : Where to insert the instruction +Value *LowerCooperativeMatrix::getLoadTrIntrinsic(Value *dataPtr, CooperativeMatrixLayout layout, + CooperativeMatrixElementType elemType, int numElements, + bool isColMajor, int waveSize, Value *stride, const Twine &instName, + Instruction *insertPos) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(insertPos); + + bool canUserLoadTr = + m_gfxIp.major >= 12 && (isTypeNCooperativeMatrix(elemType, 16) || isTypeNCooperativeMatrix(elemType, 8)); + + if (!canUserLoadTr) + return nullptr; + + // global_load_tr instrinsc are only used on Gfx12BaseLayout + // The basePtr for the instruction will be recalculated basing on GFX12_WMMA_Matrix_load document. + if (layout == CooperativeMatrixLayout::Gfx12BaseLayout) { + Type *trloadType = nullptr; + Value *trMatrix = nullptr; + Type *castType = nullptr; + Value *elementOffset = computeLoadtrBaseAddressing(layout, elemType, waveSize, stride, insertPos); + Value *elePtr = builder.CreateGEP(transCooperativeMatrixElementType(builder, elemType), dataPtr, elementOffset); + switch (elemType) { + case CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::BFloat16: + case CooperativeMatrixElementType::Int16: + castType = FixedVectorType::get(transCooperativeMatrixElementType(builder, elemType), numElements); + trloadType = FixedVectorType::get(builder.getInt16Ty(), numElements); + trMatrix = + builder.CreateIntrinsic(trloadType, Intrinsic::amdgcn_global_load_tr_b128, {elePtr}, nullptr, instName); + return builder.CreateBitCast(trMatrix, castType); + case CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Float8: + case CooperativeMatrixElementType::BFloat8: + case CooperativeMatrixElementType::Int4: + // Use <2 x i32>(or i32) @llvm.amdgcn.global.load.tr for load_tr then bitcase from i32 to i8 + if (waveSize == 32 && !isTypeNCooperativeMatrix(elemType, 4)) { + trloadType = FixedVectorType::get(builder.getInt32Ty(), numElements / 4); + } else { + trloadType = builder.getInt32Ty(); + } + trMatrix = builder.CreateIntrinsic(trloadType, Intrinsic::amdgcn_global_load_tr_b64, {elePtr}, nullptr, instName); + castType = FixedVectorType::get(transCooperativeMatrixElementType(builder, elemType), numElements); + return builder.CreateBitCast(trMatrix, castType); + default: + // Global_load_tr intrinsic is only used for 8bit/16bit elements loading + return nullptr; + } + } + return nullptr; +} +#endif // ===================================================================================================================== // Visit "CooperativeMatrixLengthOp" instruction // @@ -385,6 +636,15 @@ void LowerCooperativeMatrix::visitCooperativeMatrixLengthOp(CooperativeMatrixLen case CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout: length = 8; break; +#if LLPC_BUILD_GFX12 + case CooperativeMatrixLayout::Gfx12BaseLayout: + length = (waveSize == 32) ? 8 : 4; + break; + case CooperativeMatrixLayout::Gfx12SwizzledKX16Layout: + length = (waveSize == 32) ? 16 : 8; + length *= kSize / 32; + break; +#endif default: llvm_unreachable("unhandled matrix layout"); } @@ -430,6 +690,22 @@ void LowerCooperativeMatrix::visitCooperativeMatrixLoadOp(CooperativeMatrixLoadO auto props = getTypeProperties(elemType, layout, kSize); +#ifdef LLPC_BUILD_GFX12 + if ((m_gfxIp.major >= 12) && (addrSpace == ADDR_SPACE_GLOBAL)) { + // Global_load_tr can only be used for row_major@B and col_major@A under global_address_space + if (!isColMajor) { + Value *trLoadInst = getLoadTrIntrinsic(dataPtr, layout, elemType, props.numFlatElements, isColMajor, waveSize, + stride, load.getName(), &load); + if (trLoadInst) { + Value *coMatrix = convFlatVecToCoopMatrixVec(builder, trLoadInst, elemType, layout, kSize); + m_coopMatrixCalls.push_back(&load); + load.replaceAllUsesWith(coMatrix); + return; + } + } + } +#endif + bool isLoadingPackedVal = !isColMajor && elemType == CooperativeMatrixElementType::Int4; auto addrInfo = computeAddressing(layout, elemType, waveSize, stride, isColMajor, &load); Value *vecVal = PoisonValue::get(FixedVectorType::get(elemTy, props.numFlatElements)); @@ -700,10 +976,58 @@ Value *LowerCooperativeMatrix::cooperativeMatrixConvertInternal(CastInst::CastOp Type *dstType = nullptr; if (dstElemType == CooperativeMatrixElementType::BFloat16) dstType = FixedVectorType::get(builder.getBFloatTy(), vecSize); +#if LLPC_BUILD_GFX12 + else if (dstElemType == CooperativeMatrixElementType::BFloat8 || + dstElemType == CooperativeMatrixElementType::Float8) { + dstType = FixedVectorType::get(builder.getFloatTy(), vecSize); + + // Dest type is float32, if it is the conversion between floats, FPTrunc needs to be changed to FPExt. + if (castOp == Instruction::Instruction::FPTrunc) { + castOp = Instruction::Instruction::FPExt; + } + } +#endif else dstType = FixedVectorType::get(transCooperativeMatrixElementType(builder, dstElemType), vecSize); - if (srcElemType == CooperativeMatrixElementType::BFloat16) { +#if LLPC_BUILD_GFX12 + if (srcElemType == CooperativeMatrixElementType::Float8 || srcElemType == CooperativeMatrixElementType::BFloat8) { + assert(m_gfxIp.major >= 12 && "bf8/fp8 is only supported on gfx12+"); + // If the source is bf8/fp8, convert it float32 first, then convert dest type. + + // Use amd intrinsic convert + // llvm.amdgcn.cvt.pk.f32.bf8 + // llvm.amdgcn.cvt.pk.f32.fp8 + const Intrinsic::AMDGCNIntrinsics toF32Intrinsic = (srcElemType == CooperativeMatrixElementType::BFloat8) + ? Intrinsic::amdgcn_cvt_pk_f32_bf8 + : Intrinsic::amdgcn_cvt_pk_f32_fp8; + + assert(vecSize == 8); + source = builder.CreateBitCast(source, FixedVectorType::get(builder.getInt32Ty(), 2)); + + Type *retTy = FixedVectorType::get(builder.getFloatTy(), 2); + + // Convert the first four elements + auto element = builder.CreateExtractElement(source, uint64_t(0)); + auto elementWord0 = builder.CreateIntrinsic(retTy, toF32Intrinsic, {element, builder.getFalse()}); + auto elementWord1 = builder.CreateIntrinsic(retTy, toF32Intrinsic, {element, builder.getTrue()}); + auto element0 = builder.CreateShuffleVector(elementWord0, elementWord1, {0, 1, 2, 3}); + + // Convert the last four elements + element = builder.CreateExtractElement(source, 1); + elementWord0 = builder.CreateIntrinsic(retTy, toF32Intrinsic, {element, builder.getFalse()}); + elementWord1 = builder.CreateIntrinsic(retTy, toF32Intrinsic, {element, builder.getTrue()}); + auto element1 = builder.CreateShuffleVector(elementWord0, elementWord1, {0, 1, 2, 3}); + + source = builder.CreateShuffleVector(element0, element1, {0, 1, 2, 3, 4, 5, 6, 7}); + + // Source is converted to float32, FPExt needs to be changed to FPTrunc. + if (castOp == Instruction::Instruction::FPExt) { + castOp = Instruction::Instruction::FPTrunc; + } + } else +#endif + if (srcElemType == CooperativeMatrixElementType::BFloat16) { assert(source->getType()->isIntOrIntVectorTy()); auto *bfloat16Vec = FixedVectorType::get(builder.getBFloatTy(), vecSize); source = builder.CreateBitCast(source, bfloat16Vec); @@ -798,6 +1122,37 @@ Value *LowerCooperativeMatrix::cooperativeMatrixConvertInternal(CastInst::CastOp return builder.CreateBitCast(resultValue, FixedVectorType::get(builder.getInt16Ty(), vecSize)); } +#if LLPC_BUILD_GFX12 + if (dstElemType == CooperativeMatrixElementType::BFloat8 || dstElemType == CooperativeMatrixElementType::Float8) { + // Use amd intrinsic convert + // llvm.amdgcn.cvt.pk.bf8.f32 + // llvm.amdgcn.cvt.pk.fp8.f32 + const Intrinsic::AMDGCNIntrinsics toF8Intrinsic = (dstElemType == CooperativeMatrixElementType::BFloat8) + ? Intrinsic::amdgcn_cvt_pk_bf8_f32 + : Intrinsic::amdgcn_cvt_pk_fp8_f32; + + Value *i32Vec = PoisonValue::get(FixedVectorType::get(builder.getInt32Ty(), 2)); + for (unsigned idx = 0; idx < vecSize; idx += 4) { + // Low 16-bits + auto element0 = builder.CreateExtractElement(resultValue, idx); + auto element1 = builder.CreateExtractElement(resultValue, idx + 1); + auto int32 = builder.CreateIntrinsic(builder.getInt32Ty(), toF8Intrinsic, + {element0, element1, builder.getInt32(0), builder.getFalse()}); + + // High 16-bits + element0 = builder.CreateExtractElement(resultValue, idx + 2); + element1 = builder.CreateExtractElement(resultValue, idx + 3); + int32 = + builder.CreateIntrinsic(builder.getInt32Ty(), toF8Intrinsic, {element0, element1, int32, builder.getTrue()}); + + // Insert + i32Vec = builder.CreateInsertElement(i32Vec, int32, idx / 4); + } + auto f8Type = transCooperativeMatrixElementType(builder, dstElemType); + return builder.CreateBitCast(i32Vec, FixedVectorType::get(f8Type, vecSize)); + } +#endif + return resultValue; } @@ -847,6 +1202,25 @@ void LowerCooperativeMatrix::visitCooperativeMatrixConvertOp(CooperativeMatrixCo resultValue = cooperativeMatrixConvertInternal(castOp, source, srcElemType, dstElemType, convert.getName(), &convert); +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major == 12 && m_pipelineState->getShaderWaveSize(m_shaderStage.value()) == 64) { + if (dstElemType == CooperativeMatrixElementType::Int4) { + // Get the high half of wave64 result and combine with the low half of wave64 result to get the final result + resultValue = builder.CreateBitCast(resultValue, builder.getInt16Ty()); + resultValue = builder.CreateZExt(resultValue, builder.getInt32Ty()); + Value *permlane64 = builder.CreateIntrinsic(builder.getInt32Ty(), Intrinsic::amdgcn_permlane64, {resultValue}); + Value *result0 = builder.CreateOr(resultValue, builder.CreateShl(permlane64, 16)); + Value *result1 = builder.CreateOr(permlane64, builder.CreateShl(resultValue, 16)); + resultValue = builder.CreateSelect(builder.CreateICmpULT(threadId, builder.getInt32(32)), result0, result1); + } else if (srcElemType == CooperativeMatrixElementType::Int4) { + // lane32~63 repeat the result of lane0~31 for int4. + Value *result0 = builder.CreateShuffleVector(resultValue, resultValue, {0, 1, 2, 3}); + Value *result1 = builder.CreateShuffleVector(resultValue, resultValue, {4, 5, 6, 7}); + resultValue = builder.CreateSelect(builder.CreateICmpULT(threadId, builder.getInt32(32)), result0, result1); + } + } +#endif + // Step 3: Some cases need change the layout due to different element types after conversion. if ((numSrcBit > numDstBit) && (srcLayout != dstLayout)) { // All these reshape interfaces will return N*packetTy. @@ -1634,6 +2008,39 @@ void LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp(CooperativeMatrixMul StringRef instName = muladd.getName(); unsigned kMultiplier = muladd.getKMultiplier(); +#if LLPC_BUILD_GFX12 + // Gfx12: + // wave64: + // declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<4 x half>, <4 x half>, <4 x float>) + // declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<4 x i16>, <4 x i16>, <4 x float>) + // declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<4 x half>, <4 x half>, <4 x half>, i1 immarg) + // declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<4 x i16>, <4 x i16>, <4 x i16>, i1 immarg) + // declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 + // immarg) + // declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 + // immarg) + // declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 immarg, i32, i1 immarg, i32, + // <4 x i32>, i1 immarg) + // <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32, i32, <4 x float>) + // <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32, i32, <4 x float>) + // <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32, i32, <4 x float>) + // <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32, i32, <4 x float>) + // wave32: + // declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<8 x half>, <8 x half> , <8 x float>) + // declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<8 x i16>, <8 x i16> , <8 x float>) + // declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<8 x half>, <8 x half> , <8 x half>, i1 immarg) + // declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<8 x i16>, <8 x i16> , <8 x i16>, i1 immarg) + // declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 immarg, <2 x i32>, i1 immarg, <2 x i32> , <8 x i32>, i1 + // immarg) + // declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, i32, i1 immarg, i32 , <8 x i32>, i1 + // immarg) + // declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, + // <8 x i32>, i1 immarg) + // <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float> + // <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float> + // <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float> + // <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float> +#endif // Gfx11: // wave64: // declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half>, <16 x half>, <4 x float>) @@ -1662,12 +2069,44 @@ void LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp(CooperativeMatrixMul assert(matrixAType == matrixBType); if (m_gfxIp.major <= 11) factorFlatElemNum = 16; +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major == 12) { + if (waveSize == 64) { + factorFlatElemNum = 4; + matrixA = builder.CreateShuffleVector(matrixA, ArrayRef({0, 1})); + matrixB = builder.CreateShuffleVector(matrixB, ArrayRef({0, 1})); + } else { + factorFlatElemNum = 8; + matrixA = builder.CreateShuffleVector(matrixA, ArrayRef({0, 1, 2, 3})); + matrixB = builder.CreateShuffleVector(matrixB, ArrayRef({0, 1, 2, 3})); + } + } +#endif Type *factorType = FixedVectorType::get(transCooperativeMatrixElementType(builder, matrixAType), factorFlatElemNum); matrixA = builder.CreateBitCast(matrixA, factorType); matrixB = builder.CreateBitCast(matrixB, factorType); } else if (isTypeNCooperativeMatrix(matrixAType, 8)) { +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major == 12) { + if (waveSize == 64) { + matrixA = builder.CreateExtractElement(matrixA, builder.getInt32(0)); + matrixB = builder.CreateExtractElement(matrixB, builder.getInt32(0)); + } else { + matrixA = builder.CreateShuffleVector(matrixA, ArrayRef({0, 1})); + matrixB = builder.CreateShuffleVector(matrixB, ArrayRef({0, 1})); + } + } +#endif } else if (isTypeNCooperativeMatrix(matrixAType, 4)) { +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major == 12) { + if (waveSize == 64 && kMultiplier > 1) { + matrixA = builder.CreateExtractElement(matrixA, builder.getInt32(0)); + matrixB = builder.CreateExtractElement(matrixB, builder.getInt32(0)); + } + } +#endif } else { llvm_unreachable("Factor element type is not supported!"); } @@ -1710,6 +2149,12 @@ void LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp(CooperativeMatrixMul SmallVector args; switch (intrinsic) { +#if LLPC_BUILD_GFX12 + case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8: + case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8: + case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8: + case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8: +#endif case Intrinsic::amdgcn_wmma_f32_16x16x16_f16: case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16: args.push_back(matrixA); @@ -1734,6 +2179,9 @@ void LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp(CooperativeMatrixMul args.push_back(builder.getInt1(isSatOrOpsel)); break; case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4: +#if LLPC_BUILD_GFX12 + case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4: +#endif args.push_back(builder.getInt1(isSignedA)); args.push_back(matrixA); args.push_back(builder.getInt1(isSignedB)); @@ -1757,6 +2205,13 @@ void LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp(CooperativeMatrixMul unsigned coopVeclength = cast(matrixD->getType())->getNumElements(); Type *wordTy = isUnderlyingIntegerCooperativeMatrix(matrixCType) ? builder.getInt32Ty() : builder.getFloatTy(); matrixD = builder.CreateBitCast(matrixD, FixedVectorType::get(wordTy, coopVeclength / 2)); +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major >= 12) { + matrixD = waveSize == 64 ? builder.CreateShuffleVector(matrixD, PoisonValue::get(matrixD->getType()), + ArrayRef{0, 1, 2, 3}) + : matrixD; + } else +#endif { matrixD = waveSize == 64 ? builder.CreateShuffleVector(matrixD, PoisonValue::get(matrixD->getType()), ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}) @@ -2149,6 +2604,267 @@ Value *LowerCooperativeMatrix::getLaneNumber(BuilderBase &builder) { return result; } +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Visit "SparsityIndexLoadOp" instruction +// +// @param indexload: The dialect instruction to process +void LowerCooperativeMatrix::visitSparsityIndexLoadOp(SparsityIndexLoadOp &indexload) { + BuilderBase builder(*m_context); + builder.SetInsertPoint(&indexload); + Value *stride = indexload.getStride(); + Value *dataPtr = indexload.getPointer(); + auto memoryAccess = indexload.getMemoryAccess(); + + Value *threadId = getLaneNumber(builder); + auto waveSize = m_pipelineState->getShaderWaveSize(m_shaderStage.value()); + + Value *isEvenGroup = waveSize == 32 ? builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt32Ty(), + builder.getInt32(0xffff)) + : builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt64Ty(), + builder.getInt64(0xffff0000ffff)); + Value *rowOffsetPerLane = builder.CreateSRem(threadId, builder.getInt32(16)); + + // TODO: For lane_N and lane_N+16, they will load same dword corresponding to the 16 entries for one row of matrix A. + // Maybe use v_perm_b32 later to get the correct bytes for each lane later. + Value *offset = builder.CreateMul(rowOffsetPerLane, stride); + + // calc memoryAccess + bool isVolatile = memoryAccess & unsigned(CooperativeMatrixMemoryAccess::MemoryAccessVolatileMask); + bool isCoherent = memoryAccess & unsigned(CooperativeMatrixMemoryAccess::MemoryAccessCoherentMask); + bool isTemporal = memoryAccess & unsigned(CooperativeMatrixMemoryAccess::MemoryAccessTemporalMask); + + Value *sparseIndexPtr = builder.CreateGEP(builder.getInt32Ty(), dataPtr, offset); + Value *sparseIndexVal = builder.CreateLoad(builder.getInt32Ty(), sparseIndexPtr, isVolatile, indexload.getName()); + const unsigned addrSpace = dataPtr->getType()->getPointerAddressSpace(); + + if (isCoherent && !(addrSpace == ADDR_SPACE_LOCAL)) + cast(sparseIndexVal)->setAtomic(AtomicOrdering::Unordered); + if (isTemporal) + cast(sparseIndexVal)->setMetadata(LLVMContext::MD_nontemporal, MDNode::get(builder.getContext(), {})); + + // Lane_0: {i0_0,i0_1,i0_2,i0_3,i0_8,i0_9,i0_a,i0_b}, Lane_16:{i0_4,i0_5,i0_6,i0_7,i0_c,i0_d,i0_e,i0_f} + // When load index from i0_0 to i0_f, it needs to select the correct 16bits according lane_id saved in low 16bit. + Value *permMask = builder.CreateSelect(isEvenGroup, builder.getInt32(0x00020405), builder.getInt32(0x01030405)); + + if (waveSize == 64) { + // Lane_0:{i0_0,i0_1,i0_2,i0_3} Lane_16:{i0_8,i0_9,i0_a,i0_b} Lane_32:{i0_4,i0_5,i0_6,i0_7} + // Lane_48:{i0_c,i0_d,i0_e,i0_f} It needs to get correct 8bit index saved in low 8bit. + Value *const laneIdLessThan32 = + builder.CreateIntrinsic(Intrinsic::amdgcn_inverse_ballot, builder.getInt64Ty(), builder.getInt64(0xffffffff)); + permMask = builder.CreateSelect(laneIdLessThan32, permMask, builder.CreateShl(permMask, 8)); + } + + Value *indexValue = builder.CreateIntrinsic(Intrinsic::amdgcn_perm, builder.getInt32Ty(), + {sparseIndexVal, PoisonValue::get(sparseIndexVal->getType()), permMask}); + + // Now indexValue should be [unused_16bit | index_16bit] for wave32 or [unused_24bit | index_8bit] for wave64. + m_coopMatrixCalls.push_back(&indexload); + indexload.replaceAllUsesWith(indexValue); +} + +// ===================================================================================================================== +// Visit "SparseCooperativeMatrixMulAddOp" instruction +// +// @param sparseMulAdd: The dialect instruction to process +void LowerCooperativeMatrix::visitSparseCooperativeMatrixMulAddOp(SparseCooperativeMatrixMulAddOp &sparseMulAdd) { + if (m_gfxIp.major < 12) + // Swmma is only supported after gfx12. + return; + BuilderBase builder(*m_context); + builder.SetInsertPoint(&sparseMulAdd); + + Value *matrixA = sparseMulAdd.getMatrixA(); + Value *matrixB = sparseMulAdd.getMatrixB(); + Value *matrixC = sparseMulAdd.getMatrixC(); + auto isSignedA = sparseMulAdd.getIsSignedA(); + auto isSignedB = sparseMulAdd.getIsSignedB(); + auto isSat = sparseMulAdd.getIsSat(); + auto matrixAType = sparseMulAdd.getMatrixAElemType(); + auto matrixBType = sparseMulAdd.getMatrixBElemType(); + auto matrixCType = sparseMulAdd.getMatrixCElemType(); + auto matrixDType = sparseMulAdd.getMatrixDElemType(); + Value *sparseIndex = sparseMulAdd.getSparseIndex(); + assert(sparseIndex->getType() == builder.getInt32Ty()); + unsigned kMultiplier = sparseMulAdd.getKMultiplier(); + + // clang-format off + // wave64: + // declare<4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.v4f32.i8(<4 x half>, <8 x half>, <4 x + // float>, i8) + // declare<4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.v4f32.i8(<4 x i16>, <8 x i16>, + // <4 x float>, i8) + // declare<4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.v4f16.i8(<4 x half>, <8 x half>, + // <4 x half>, i8) + // declare<4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.v4i16.i8(<4 x i16>, <8 x i16>, + // <4 x i16>,i8) + // declare<4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.v4i32.i8(i1 immarg, i32, i1 immarg, + // <2 x i32>, <4 x i32>, i8 % Index,i1 immarg) + // declare<4 x i32> @llvm.amdgcn.swmmac.i32 .16x16x32.iu4.v4i32.i32.i32.v4i32.i16(i1 immarg, i32, i1 immarg, i32, + // <4 x i32>, i16 % Index, i1 immarg) + // declare<4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.v4i32.i16(i1 immarg, i32, i1 immarg, + // <2 x i32>, <4 x i32>, i16 % Index, i1 immarg) + // declare<4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, + // i8) + // declare<4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, + // i8) + // declare<4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, + // i8) + // declare<4 x float> @llvm.amdgcn.swmmac.f32 .16x16x32.bf8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, + // i8) + + // wave32: + // declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.v8f32.i16(<8 x half>, <16 x half>, <8 + // x float>, i16) + // declare<8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.v8f32.i16(<8 x i16>, <16 x i16>, + // <8 x float>, i16) + // declare<8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.v8f16.i16(<8 x half>, <16 x half>, + // <8 x half>, i16) + // declare<8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.v8i16.i16(<8 x i16>, <16 x i16>, + // <8 x i16>, i16) + // declare<8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 immarg, <2 x i32>, i1 immarg, + // <4 x i32>, <8 x i32>,i16 % Index, i1 immarg) + // declare<8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 immarg, i32, i1 immarg, + // <2 x i32>, <8 x i32>, i16 % Index,i1 immarg) + // declare<8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.v8i32.i32(i1 immarg, <2 x i32>, i1 immarg, + // <4 x i32>, <8 x i32>,i32 % Index, i1 immarg) + // declare<8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, + // <8 x float>, i16) + // declare<8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, + // <8 x float>, i16) + // declare<8 x float> @llvm.amdgcn.swmmac.f32 .16x16x32.bf8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, + // <8 x float>, i16) + // declare<8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, + // <8 x float>, i16) + // clang-format on + + Value *matrixD; + Value *IndexOfSparseMatrix; + unsigned waveSize = m_pipelineState->getShaderWaveSize(m_shaderStage.value()); + + unsigned factorAFlatElemNum = 0; + unsigned factorBFlatElemNum = 0; + if (isTypeNCooperativeMatrix(matrixAType, 16)) { + if (m_gfxIp.major == 12 && waveSize == 64) { + factorAFlatElemNum = 4; + factorBFlatElemNum = 8; + matrixA = builder.CreateShuffleVector(matrixA, ArrayRef({0, 1})); + matrixB = builder.CreateShuffleVector(matrixB, ArrayRef({0, 1, 2, 3})); + } else { + factorAFlatElemNum = 8; + factorBFlatElemNum = 16; + } + Type *factorAType = + FixedVectorType::get(transCooperativeMatrixElementType(builder, matrixAType), factorAFlatElemNum); + Type *factorBType = + FixedVectorType::get(transCooperativeMatrixElementType(builder, matrixBType), factorBFlatElemNum); + matrixA = builder.CreateBitCast(matrixA, factorAType); + matrixB = builder.CreateBitCast(matrixB, factorBType); + } else if (isTypeNCooperativeMatrix(matrixAType, 8)) { + if (m_gfxIp.major == 12 && waveSize == 64) { + matrixA = builder.CreateExtractElement(matrixA, builder.getInt32(0)); + matrixB = builder.CreateShuffleVector(matrixB, ArrayRef({0, 1})); + } + } else if (isTypeNCooperativeMatrix(matrixAType, 4)) { + if (m_gfxIp.major == 12 && waveSize == 64) { + matrixB = builder.CreateExtractElement(matrixB, builder.getInt32(0)); + } + } else { + llvm_unreachable("Factor element type is not supported!"); + } + + unsigned matrixLength = 0; + if (isTypeNCooperativeMatrix(matrixCType, 32)) { + matrixC = + waveSize == 64 ? builder.CreateShuffleVector(matrixC, ArrayRef({0, 1, 2, 3}), "shuffleVector") : matrixC; + } else if (isTypeNCooperativeMatrix(matrixCType, 16)) { + matrixC = waveSize == 64 ? builder.CreateShuffleVector(matrixC, ArrayRef({0, 1}), "shuffleVector") : matrixC; + matrixLength = cast(matrixC->getType())->getNumElements(); + Type *castType = nullptr; + if (matrixCType == CooperativeMatrixElementType::BFloat16) { + // HW instructions require i16 type for bfloat16. + castType = builder.getInt16Ty(); + } else + castType = builder.getHalfTy(); + Type *accumType = FixedVectorType::get(castType, matrixLength * 2); + matrixC = builder.CreateBitCast(matrixC, accumType); + } else { + llvm_unreachable("Accumulator element type is not supported!"); + } + + Type *sparseIndexTy = + waveSize == 64 ? FixedVectorType::get(builder.getInt8Ty(), 4) : FixedVectorType::get(builder.getInt16Ty(), 2); + sparseIndex = builder.CreateBitCast(sparseIndex, sparseIndexTy); + + // TODO: indexkeyPos is set to 0 but needs to update in future. + unsigned indexkeyPos = 0; + IndexOfSparseMatrix = builder.CreateExtractElement(sparseIndex, indexkeyPos); + + Intrinsic::AMDGCNIntrinsics swmmaInst = InvalidIntrinsicID; + { + swmmaInst = + GetWmmaIntrinsicID(SWmmaIntrinsicTable_gfx12, matrixAType, matrixBType, matrixCType, matrixDType, kMultiplier); + } + if (swmmaInst == InvalidIntrinsicID) + llvm_unreachable("HW intrinsics not supported!"); + + SmallVector args; + switch (swmmaInst) { + case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16: + case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8: + case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8: + args.push_back(matrixA); + args.push_back(matrixB); + args.push_back(matrixC); + args.push_back(IndexOfSparseMatrix); + break; + case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8: + case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4: + case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4: + args.push_back(builder.getInt1(isSignedA)); + args.push_back(matrixA); + args.push_back(builder.getInt1(isSignedB)); + args.push_back(matrixB); + args.push_back(matrixC); + args.push_back(IndexOfSparseMatrix); + args.push_back(builder.getInt1(isSat)); + break; + default: + llvm_unreachable("Should never be called!"); + break; + } + + auto retTy = matrixC->getType(); + if (matrixCType != matrixDType) { + assert(matrixDType == CooperativeMatrixElementType::Float32 && matrixCType == CooperativeMatrixElementType::Int32); + retTy = sparseMulAdd.getResult()->getType(); + } + matrixD = builder.CreateIntrinsic(retTy, swmmaInst, args, nullptr, sparseMulAdd.getName()); + + if (isTypeNCooperativeMatrix(matrixCType, 16)) { + unsigned coopVeclength = cast(matrixD->getType())->getNumElements(); + Type *wordTy = isUnderlyingIntegerCooperativeMatrix(matrixCType) ? builder.getInt32Ty() : builder.getFloatTy(); + matrixD = builder.CreateBitCast(matrixD, FixedVectorType::get(wordTy, coopVeclength / 2)); +#if LLPC_BUILD_GFX12 + matrixD = waveSize == 64 ? builder.CreateShuffleVector(matrixD, PoisonValue::get(matrixD->getType()), + ArrayRef{0, 1, 2, 3}) + : matrixD; +#endif + } else { + matrixD = waveSize == 64 ? builder.CreateShuffleVector(matrixD, PoisonValue::get(matrixD->getType()), + ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}) + : matrixD; + } + m_coopMatrixCalls.push_back(&sparseMulAdd); + sparseMulAdd.replaceAllUsesWith(matrixD); +} +#endif + // ===================================================================================================================== // Visit "CooperativeRowAccLoadOp" instruction // @@ -2323,7 +3039,12 @@ void LowerCooperativeMatrix::visitCooperativeRowAccExpandOp(CooperativeRowAccExp else assert(rowAccElemType == matrixElemType); +#if LLPC_BUILD_GFX12 + assert(matrixLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout || + matrixLayout == CooperativeMatrixLayout::Gfx12BaseLayout); +#else assert(matrixLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout); +#endif auto props = getTypeProperties(matrixElemType, matrixLayout, 16); Type *flatType = FixedVectorType::get(transCooperativeMatrixElementType(builder, matrixElemType), props.numFlatElements); @@ -2392,7 +3113,60 @@ void LowerCooperativeMatrix::visitCooperativeRowAccExpandOp(CooperativeRowAccExp DppCtrl::DppRowShare12, DppCtrl::DppRowShare14}; memcpy(expandCtrl, ctrl, sizeof(ctrl)); } - } else + } +#if LLPC_BUILD_GFX12 + else if (matrixLayout == CooperativeMatrixLayout::Gfx12BaseLayout) { + if (waveSize == 64) { + // Gfx12 Gfx12BaseLayout F32/I32@Wave64: + // VGPR/Lane . 0 . . . . 1 . . . . 15 . . . . 16 . . . . 31 + // VGPR[8]: C0_0 . . . C0_1 . . . C0_f . . . C8_0 . . . C8_f + // VGPR[9]: C1_0 . . . C1_1 . . . C1_f . . . C9_0 . . . C9_f + // VGPR[10]: C2_0 . . . C2_1 . . . C2_f . . . Ca_0 . . . Ca_f + // VGPR[11]: C3_0 . . . C3_1 . . . C3_f . . . Cb_0 . . . Cb_f + // VGPR/Lane . 32 . . . 33 . . . . 47 . . . . 48 . . . . 63 + // VGPR[8]: C4_0 . . . C4_1 . . . C4_f . . . Cc_0 . . . Cc_f + // VGPR[9]: C5_0 . . . C5_1 . . . C5_f . . . Cd_0 . . . Cd_f + // VGPR[10]: C6_0 . . . C6_1 . . . C6_f . . . Ce_0 . . . Ce_f + // VGPR[11]: C7_0 . . . C7_1 . . . C7_f . . . Cf_0 . . . Cf_f + // F16/I16@Wave64: + // Similar with wave32 which will only use 2 Vgprs. + // Row accumulator data is in finalized state and duplciated in each 16 lanes. + // Change row accumulator data lanes: + // 16 - 31 to [C8, C9, Ca, Cb, Cc, Cd, Ce, Cf, XX, XX, XX, XX, XX, XX, XX, XX]. + // 32 - 47 to [C4, C5, C6, C7, C8, C9, Ca, Cb, Cc, Cd, Ce, Cf, XX, XX, XX, XX]. + // 48 - 63 to [Cc, Cd, Ce, Cf, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX]. + shuffleCtrl[1] = DppCtrl::DppRowSl8; + shuffleCtrl[2] = DppCtrl::DppRowSl4; + shuffleCtrl[3] = DppCtrl::DppRowSl12; + expandCtrl[0] = DppCtrl::DppRowShare0; + expandCtrl[1] = DppCtrl::DppRowShare1; + expandCtrl[2] = DppCtrl::DppRowShare2; + expandCtrl[3] = DppCtrl::DppRowShare3; + } else { + // Gfx12 Gfx12BaseLayout F32/I32@Wave32: + // VGPR/Lane . 0 . . . . 1 . . . . 15 . . . . 16 . . . . 31 + // VGPR[8]: C0_0 . . . C0_1 . . . C0_f . . . C8_0 . . . C8_f + // VGPR[9]: C1_0 . . . C1_1 . . . C1_f . . . C9_0 . . . C9_f + // VGPR[10]: C2_0 . . . C2_1 . . . C2_f . . . Ca_0 . . . Ca_f + // VGPR[11]: C3_0 . . . C3_1 . . . C3_f . . . Cb_0 . . . Cb_f + // VGPR[12]: C4_0 . . . C4_1 . . . C4_f . . . Cc_0 . . . Cc_f + // VGPR[13]: C5_0 . . . C5_1 . . . C5_f . . . Cd_0 . . . Cd_f + // VGPR[14]: C6_0 . . . C6_1 . . . C6_f . . . Ce_0 . . . Ce_f + // VGPR[15]: C7_0 . . . C7_1 . . . C7_f . . . Cf_0 . . . Cf_f + // F16/I16@Wave32: + // all the elements are fully packed in GFX12 which is similar with F32/I32, which will only use 4 Vgprs in + // wave32. + // Change row accumulator data lanes: + // 16 - 31 to [C8, C9, Ca, Cb, Cc, Cd, Ce, Cf, XX, XX, XX, XX, XX, XX, XX, XX]. + shuffleCtrl[1] = DppCtrl::DppRowSl8; + constexpr DppCtrl ctrl[] = {DppCtrl::DppRowShare0, DppCtrl::DppRowShare1, DppCtrl::DppRowShare2, + DppCtrl::DppRowShare3, DppCtrl::DppRowShare4, DppCtrl::DppRowShare5, + DppCtrl::DppRowShare6, DppCtrl::DppRowShare7}; + memcpy(expandCtrl, ctrl, sizeof(ctrl)); + } + } +#endif + else llvm_unreachable("unknow layout"); Value *rowAccShuffleVal = rowAccVal; @@ -2436,7 +3210,12 @@ void LowerCooperativeMatrix::visitCooperativeRowAccSumAccumulateOp(CooperativeRo auto rowAccElemType = sumAccumulate.getRowAccElemType(); auto isSigned = sumAccumulate.getIsSigned(); +#if LLPC_BUILD_GFX12 + assert(matrixLayout == CooperativeMatrixLayout::FactorMatrixLayout || + matrixLayout == CooperativeMatrixLayout::Gfx12BaseLayout); +#else assert(matrixLayout == CooperativeMatrixLayout::FactorMatrixLayout); +#endif Value *vcFlat = convCoopMatrixVecToFlatVec(builder, matrixVal, matrixElemType, matrixLayout); const unsigned numElems = cast(vcFlat->getType())->getNumElements(); @@ -2535,6 +3314,11 @@ void LowerCooperativeMatrix::visitCooperativeRowAccScalarOp(CooperativeRowAccSca assert(transCooperativeMatrixElementType(builder, elemType) == rowAccVal->getType()); assert(transCooperativeMatrixElementType(builder, elemType) == scalarVal->getType()); +#if LLPC_BUILD_GFX12 + // gfx12 row accumulator layout: + // - finalize_lane[0:15] = accumulate_lane[0:15] + accumulate_lane[16:31] + // - finalize_lane[16:31] = finalize_lane[0:15] +#endif bool needHandleAccumulateMode = accumulateMode && (m_gfxIp.major >= 12); if (needHandleAccumulateMode) { diff --git a/lgc/lowering/LowerGpuRt.cpp b/lgc/lowering/LowerGpuRt.cpp index 1556d4eae7..ad57afd135 100644 --- a/lgc/lowering/LowerGpuRt.cpp +++ b/lgc/lowering/LowerGpuRt.cpp @@ -306,6 +306,15 @@ void LowerGpuRt::visitLdsStackInit(GpurtLdsStackInitOp &inst) { m_builder->CreateGEP(m_stackTy, m_stack, {m_builder->getInt32(0), stackBasePerThread}), m_builder->getInt32Ty()); Value *stackAddr; +#if LLPC_BUILD_GFX12 + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 12) { + // stack_addr[29:15] = stack_base[15:2] + // stack_addr[14:10] = stack_index[5:0] + // Note that this relies on stackAddr being a multiple of 4, so that bits 15 and 14 are 0. + // stackAddrDw = (stackAddr >> 2) << 15. + stackAddr = m_builder->CreateShl(stackBaseAsInt, 13); + } else +#endif { // stack_addr[31:18] = stack_base[15:2] // stack_addr[17:0] = stack_index[17:0] diff --git a/lgc/lowering/LowerInOut.cpp b/lgc/lowering/LowerInOut.cpp index ec34a59bcc..f7a96b9e77 100644 --- a/lgc/lowering/LowerInOut.cpp +++ b/lgc/lowering/LowerInOut.cpp @@ -145,6 +145,9 @@ PreservedAnalyses LowerInOut::run(Module &module, ModuleAnalysisManager &analysi m_buffFormats = &BufferFormatsGfx10; break; case 11: +#if LLPC_BUILD_GFX12 + case 12: +#endif m_buffFormats = &BufferFormatsGfx11; break; default: @@ -502,6 +505,41 @@ void LowerInOut::processShader() { unsigned workgroupSizeY = mode.workgroupSizeY; unsigned workgroupSizeZ = mode.workgroupSizeZ; SwizzleWorkgroupLayout layout = calculateWorkgroupLayout(m_pipelineState, m_shaderStage.value()); +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major >= 12) { + // For HW swizzle, the large-pattern unroll is basically the same Z-order pattern used for 2x2 + WorkgroupLayout swizzleWgLayout = WorkgroupLayout::Unknown; + if (layout.macroLayout == WorkgroupLayout::Unknown) + swizzleWgLayout = layout.microLayout; + else + swizzleWgLayout = layout.macroLayout; + + PalMetadata *metadata = m_pipelineState->getPalMetadata(); + if (m_pipelineState->getOptions().xInterleave != 0 || m_pipelineState->getOptions().yInterleave != 0) { + metadata->getPipelineNode()[Util::Abi::PipelineMetadataKey::ComputeRegisters].getMap( + true)[Util::Abi::ComputeRegisterMetadataKey::XInterleave] = m_pipelineState->getOptions().xInterleave; + metadata->getPipelineNode()[Util::Abi::PipelineMetadataKey::ComputeRegisters].getMap( + true)[Util::Abi::ComputeRegisterMetadataKey::YInterleave] = m_pipelineState->getOptions().yInterleave; + } else { + switch (swizzleWgLayout) { + case WorkgroupLayout::Quads: + metadata->getPipelineNode()[Util::Abi::PipelineMetadataKey::ComputeRegisters].getMap( + true)[Util::Abi::ComputeRegisterMetadataKey::XInterleave] = 1; + metadata->getPipelineNode()[Util::Abi::PipelineMetadataKey::ComputeRegisters].getMap( + true)[Util::Abi::ComputeRegisterMetadataKey::YInterleave] = 1; + break; + case WorkgroupLayout::SexagintiQuads: + metadata->getPipelineNode()[Util::Abi::PipelineMetadataKey::ComputeRegisters].getMap( + true)[Util::Abi::ComputeRegisterMetadataKey::XInterleave] = 3; + metadata->getPipelineNode()[Util::Abi::PipelineMetadataKey::ComputeRegisters].getMap( + true)[Util::Abi::ComputeRegisterMetadataKey::YInterleave] = 3; + break; + default: + break; + } + } + } +#endif while (!func.use_empty()) { CallInst *reconfigCall = cast(*func.user_begin()); Value *localInvocationId = reconfigCall->getArgOperand(0); @@ -4280,7 +4318,14 @@ Value *LowerInOut::readValueFromLds(bool offChip, Type *readTy, Value *ldsOffset } else if (m_gfxIp.major == 11) { // NOTE: dlc depends on MALL NOALLOC which isn't used by now. coherent.bits.glc = true; - } else + } +#if LLPC_BUILD_GFX12 + else if (m_gfxIp.major >= 12) { + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_DEV; + coherent.gfx12.th = m_pipelineState->getTemporalHint(TH::TH_RT, TemporalHintTessRead); + } +#endif + else llvm_unreachable("Not implemented!"); for (unsigned i = 0, combineCount = 0; i < numChannels; i += combineCount) @@ -4370,6 +4415,12 @@ void LowerInOut::writeValueToLds(bool offChip, Value *writeValue, Value *ldsOffs CoherentFlag coherent = {}; if (m_gfxIp.major <= 11) coherent.bits.glc = true; +#if LLPC_BUILD_GFX12 + else { + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_DEV; + coherent.gfx12.th = m_pipelineState->getTemporalHint(TH::TH_WB, TemporalHintTessWrite); + } +#endif for (unsigned i = 0, combineCount = 0; i < numChannels; i += combineCount) { combineCount = diff --git a/lgc/lowering/MeshTaskShader.cpp b/lgc/lowering/MeshTaskShader.cpp index 5a41847b47..505ac02b1d 100644 --- a/lgc/lowering/MeshTaskShader.cpp +++ b/lgc/lowering/MeshTaskShader.cpp @@ -1693,6 +1693,10 @@ void MeshTaskShader::lowerEmitMeshTasks(EmitMeshTasksOp &emitMeshTasksOp) { groupCount = m_builder.CreateInsertElement(groupCount, groupCountZ, 2); CoherentFlag coherent = {}; +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major >= 12) + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_SYS; +#endif m_builder.CreateIntrinsic(m_builder.getVoidTy(), Intrinsic::amdgcn_raw_buffer_store, {groupCount, drawDataRingBufDesc, m_builder.getInt32(0), drawDataRingEntryOffset, @@ -1803,10 +1807,21 @@ void MeshTaskShader::lowerSetMeshPrimitiveIndices(SetMeshPrimitiveIndicesOp &set // // HW requires the primitive connectivity data has the following bit layout: // +#if LLPC_BUILD_GFX12 + // Pre-GFX12: +#endif // +----------------+---------------+---------------+---------------+ // | Null Primitive | Vertex Index2 | Vertex Index1 | Vertex Index0 | // | [31] | [28:20] | [18:10] | [8:0] | // +----------------+---------------+---------------+---------------+ +#if LLPC_BUILD_GFX12 + // + // GFX12: + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // | Null Primitive | Edge Flag2 | Vertex Index2 | Edge Flag1 | Vertex Index1 | Edge Flag0 | Vertex Index0 | + // | [31] | [26] | [25:18] | [17] | [16:9] | [8] | [7:0] | + // +----------------+------------+---------------+------------+---------------+------------+---------------+ +#endif // auto &meshMode = m_pipelineState->getShaderModes()->getMeshShaderMode(); Value *primitiveData = nullptr; @@ -1823,7 +1838,12 @@ void MeshTaskShader::lowerSetMeshPrimitiveIndices(SetMeshPrimitiveIndicesOp &set primitiveData = m_builder.CreateShl(vertex1, 10); primitiveData = m_builder.CreateOr(primitiveData, vertex0); } else { +#if LLPC_BUILD_GFX12 + primitiveData = m_builder.CreateShl(vertex1, 9); + primitiveData = m_builder.CreateOr(primitiveData, vertex0); +#else llvm_unreachable("Not implemented!"); +#endif } } else { assert(meshMode.outputPrimitive == OutputPrimitives::Triangles); @@ -1837,7 +1857,14 @@ void MeshTaskShader::lowerSetMeshPrimitiveIndices(SetMeshPrimitiveIndicesOp &set primitiveData = m_builder.CreateShl(primitiveData, 10); primitiveData = m_builder.CreateOr(primitiveData, vertex0); } else { +#if LLPC_BUILD_GFX12 + primitiveData = m_builder.CreateShl(vertex2, 9); + primitiveData = m_builder.CreateOr(primitiveData, vertex1); + primitiveData = m_builder.CreateShl(primitiveData, 9); + primitiveData = m_builder.CreateOr(primitiveData, vertex0); +#else llvm_unreachable("Not implemented!"); +#endif } } @@ -2141,6 +2168,11 @@ void MeshTaskShader::initWaveThreadInfo(Function *entryPoint) { // Task shader auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(ShaderStage::Task)->entryArgIdxs.task; +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major >= 12) { + m_waveThreadInfo.waveIdInSubgroup = m_builder.CreateIntrinsic(Intrinsic::amdgcn_wave_id, {}, {}); + } else +#endif { // waveId = dispatchInfo[24:20] m_waveThreadInfo.waveIdInSubgroup = @@ -2216,7 +2248,15 @@ Value *MeshTaskShader::getShaderRingEntryIndex(Function *entryPoint) { workgroupIds[1] = m_builder.CreateExtractElement(workgroupId, 1); workgroupIds[2] = m_builder.CreateExtractElement(workgroupId, 2); } else { +#if LLPC_BUILD_GFX12 + // NOTE: On GFX12+, we use the intrinsics to get workgroup ID X/Y/Z instead of getting them from entry-point + // arguments. This is because the IDs are modeled by architected dispatch ID GPRs rather than normal SGPRs. + workgroupIds[0] = m_builder.CreateIntrinsic(Intrinsic::amdgcn_workgroup_id_x, {}, {}); + workgroupIds[1] = m_builder.CreateIntrinsic(Intrinsic::amdgcn_workgroup_id_y, {}, {}); + workgroupIds[2] = m_builder.CreateIntrinsic(Intrinsic::amdgcn_workgroup_id_z, {}, {}); +#else llvm_unreachable("Not implemented!"); +#endif } auto dispatchDims = getFunctionArgument(entryPoint, entryArgIdxs.dispatchDims); @@ -3026,6 +3066,11 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { coherent.bits.glc = true; } +#if LLPC_BUILD_GFX12 + else { + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_DEV; + } +#endif m_builder.CreateIntrinsic(m_builder.getVoidTy(), Intrinsic::amdgcn_struct_buffer_store, {valueToStore, m_attribRingBufDesc, m_waveThreadInfo.primOrVertexIndex, @@ -3635,7 +3680,12 @@ bool MeshTaskShader::checkNeedBarrierFlag(Function *entryPoint) { assert(getShaderStage(entryPoint) == ShaderStage::Mesh); auto module = entryPoint->getParent(); for (auto &func : module->functions()) { +#if LLPC_BUILD_GFX12 + if (func.isIntrinsic() && (func.getIntrinsicID() == Intrinsic::amdgcn_s_barrier || + func.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal)) { +#else if (func.isIntrinsic() && func.getIntrinsicID() == Intrinsic::amdgcn_s_barrier) { +#endif for (auto user : func.users()) { CallInst *const call = cast(user); if (call->getParent()->getParent() == entryPoint) @@ -3776,6 +3826,14 @@ void MeshTaskShader::createFenceAndBarrier() { // ===================================================================================================================== // Create LDS barrier to guarantee the synchronization of LDS operations. void MeshTaskShader::createBarrier() { +#if LLPC_BUILD_GFX12 + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 12) { + m_builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier_signal, {}, m_builder.getInt32(WorkgroupNormalBarrierId)); + m_builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier_wait, {}, + m_builder.getInt16(static_cast(WorkgroupNormalBarrierId))); + return; + } +#endif m_builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier, {}, {}); } diff --git a/lgc/lowering/MutateEntryPoint.cpp b/lgc/lowering/MutateEntryPoint.cpp index 55a9bef594..a8df8d2d78 100644 --- a/lgc/lowering/MutateEntryPoint.cpp +++ b/lgc/lowering/MutateEntryPoint.cpp @@ -486,6 +486,14 @@ void MutateEntryPoint::lowerAsCpsReference(cps::AsContinuationReferenceOp &asCps Value *reloc = nullptr; Function &callee = *cast(asCpsReferenceOp.getFn()); +#if LLPC_BUILD_GFX12 + if (isDynamicVgprEnabled()) { + auto funcName = callee.getName(); + std::string relocName = "_dvgpr$" + funcName.str(); + reloc = builder.CreateRelocationConstant(relocName); + } +#endif + Value *loweredReference = lgc::cps::lowerAsContinuationReference(builder, asCpsReferenceOp, reloc); assert(asCpsReferenceOp.getType()->getIntegerBitWidth() == 32); @@ -545,7 +553,17 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { } else { numShaderArg = m_cpsShaderInputCache.getTypes().size(); numUserdata = haveLocalInvocationId ? numShaderArg - 1 : numShaderArg; +#if LLPC_BUILD_GFX12 + if (isDynamicVgprEnabled()) { + numUserdata--; + assert(m_cpsShaderInputCache.getNames().back() == "MaxOutgoingVgprCount"); + assert(haveLocalInvocationId == (m_cpsShaderInputCache.getNames()[numShaderArg - 2] == "LocalInvocationId")); + } else { + assert(haveLocalInvocationId == (m_cpsShaderInputCache.getNames().back() == "LocalInvocationId")); + } +#else assert(haveLocalInvocationId == (m_cpsShaderInputCache.getNames().back() == "LocalInvocationId")); +#endif } // Get all the return instructions. @@ -733,6 +751,39 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { AddressExtender addressExtender(func, tailBlock); Value *jumpTarget = addressExtender.extend(addr32, builder.getInt32(HighAddrPc), builder.getPtrTy(), builder); +#if LLPC_BUILD_GFX12 + Value *numVgpr = nullptr; + if (isDynamicVgprEnabled()) { + // dVGPRs only support wave 32 mode. + assert(waveSize == 32); + // The required number of VGPR blocks minus 1 is stored in 3~5 bit of continuation reference. + numVgpr = builder.CreateAnd(targetVcr, builder.getInt32(0x38u)); + // Each block means 16 VGPRs + // numVgpr = (vcr[bit 3..5] >> 3 + 1) * 16 -> numVgpr = vcr[bit 3..5] << 1 + 16 + numVgpr = builder.CreateShl(numVgpr, 1); + numVgpr = builder.CreateAdd(numVgpr, builder.getInt32(16)); + + // Take the maximum number of VGPRs that may be live out of any shader in the pipeline into consideration. + // The number is stored in the last SGPR argument. + if (auto maxOutgoingVgprCount = cps::tryGetMaxOutgoingVgprCount(*func)) { + // NOTE: If this metadata is set, it means that this is kernel entry and it will initialize the SGPR of max + // outgoing VGPR count. + assert(!isCpsFunc); + sgprArgs.push_back(builder.getInt32(maxOutgoingVgprCount.value())); + } else { + // Max outgoing VGPR count is the last argument. + assert(func->getArg(numShaderArg - 1)->getName() == "MaxOutgoingVgprCount"); + sgprArgs.push_back(func->getArg(numShaderArg - 1)); + } + numVgpr = builder.CreateBinaryIntrinsic(Intrinsic::umax, numVgpr, sgprArgs.back()); + + // Always pass %addr32, %execMask and %num_vgprs to fallback function using the last 3 SGPRs. + sgprArgs.push_back(addr32); + sgprArgs.push_back(execMask); + sgprArgs.push_back(numVgpr); + } +#endif + const DataLayout &layout = func->getParent()->getDataLayout(); SmallVector sgprI32; splitIntoI32(layout, builder, sgprArgs, sgprI32); @@ -740,6 +791,17 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { SmallVector chainArgs = {jumpTarget, execMask, sgprVec, vgprArg}; +#if LLPC_BUILD_GFX12 + if (isDynamicVgprEnabled()) { + // Bit 0 of flags set to 1 means dVGPR mode enabled + chainArgs.push_back(builder.getInt32(1)); + chainArgs.push_back(numVgpr); + chainArgs.push_back(builder.getInt32(~0u)); // fallback_exec + + auto fallbackFunc = createRetryVgprAllocFunc(cast(sgprVec->getType())); + chainArgs.push_back(fallbackFunc); + } else +#endif { // No flags chainArgs.push_back(builder.getInt32(0)); @@ -762,10 +824,74 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { .getMap(true)[Util::Abi::PipelineMetadataKey::ShaderFunctions] .getMap(true); shaderFunctions[funcName].getMap(true)[Util::Abi::HardwareStageMetadataKey::FrontendStackSize] = stackSize; +#if LLPC_BUILD_GFX12 + if (isDynamicVgprEnabled()) { + // There are 8 VGPRs reserved for amdgpu_cs_chain call. + shaderFunctions[funcName].getMap(true)[Util::Abi::HardwareStageMetadataKey::OutgoingVgprCount] = + unsigned(vgprNum) + 8; + } +#endif return true; } +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Create a function to do retry vgpr alloc +// +// @param sgprsTy : the types of SGPRs used for llvm.amdgcn.cs.chain call +Function *lgc::MutateEntryPoint::createRetryVgprAllocFunc(FixedVectorType *sgprsTy) { + IRBuilder<> builder(*m_context); + + std::string funcName = "retry_vgpr_alloc."; + funcName += getTypeName(sgprsTy); + + // If function already exists, just return it. + if (auto func = m_module->getFunction(funcName)) + return func; + + auto funcTy = FunctionType::get(builder.getVoidTy(), {sgprsTy}, false); + auto func = Function::Create(funcTy, GlobalValue::ExternalLinkage, funcName, m_module); + func->addParamAttr(0, Attribute::InReg); + func->setCallingConv(CallingConv::AMDGPU_CS_ChainPreserve); + auto bb = BasicBlock::Create(func->getContext(), "", func); + builder.SetInsertPoint(bb); + + auto sgprs = func->getArg(0); + auto sgprCount = sgprsTy->getNumElements(); + // NOTE: %addr32, %execMask and %num_vgprs are always placed at the last of SGPRs. + auto addr32 = builder.CreateExtractElement(sgprs, sgprCount - 3); + auto execMask = builder.CreateExtractElement(sgprs, sgprCount - 2); + auto numVgprs = builder.CreateExtractElement(sgprs, sgprCount - 1); + + AddressExtender addressExtender(func); + Value *jumpTarget = addressExtender.extend(addr32, builder.getInt32(HighAddrPc), builder.getPtrTy(), builder); + + // The retry function uses amdgpu_cs_chain_preserve calling convention, no VGPRs passing is required + Value *vgprs = PoisonValue::get(StructType::get(*m_context)); + + Value *chainArgs[] = {jumpTarget, execMask, sgprs, vgprs, builder.getInt32(1), numVgprs, builder.getInt32(~0u), func}; + + // Sleep a little so as not to overwhelm the instruction fetch + // TODO: Experiment and pick ideal sleep time on real hardware. + constexpr unsigned RetrySleepCount = 2; + builder.CreateIntrinsic(Intrinsic::amdgcn_s_sleep, {}, builder.getInt32(RetrySleepCount)); + + // TODO: Release extraneous VGPRs on failure so that other waves have a higher chance of making progress (may be done + // in LLVM) + + Type *chainTys[] = {builder.getPtrTy(), builder.getInt32Ty(), sgprs->getType(), vgprs->getType()}; + auto *chainCall = builder.CreateIntrinsic(Intrinsic::amdgcn_cs_chain, chainTys, chainArgs); + // Add inreg attribute for (fn, exec, sgprs). + for (unsigned arg = 0; arg < 3; arg++) + chainCall->addParamAttr(arg, Attribute::InReg); + + builder.CreateUnreachable(); + + return func; +} +#endif + // ===================================================================================================================== // Mutate the argument list of the cps function // @@ -830,6 +956,10 @@ Function *MutateEntryPoint::lowerCpsFunction(Function *func, ArrayRef fi SmallVector argAttrs; unsigned numUserdataArg = haveLocalInvocationId ? fixedShaderArgTys.size() - 1 : fixedShaderArgTys.size(); +#if LLPC_BUILD_GFX12 + if (isDynamicVgprEnabled()) + numUserdataArg--; +#endif for (unsigned idx = 0; idx != numUserdataArg; ++idx) argAttrs.push_back(inRegAttrSet); @@ -837,6 +967,11 @@ Function *MutateEntryPoint::lowerCpsFunction(Function *func, ArrayRef fi if (haveLocalInvocationId) argAttrs.push_back(emptyAttrSet); +#if LLPC_BUILD_GFX12 + if (isDynamicVgprEnabled()) + argAttrs.push_back(inRegAttrSet); +#endif + // %vcr attribute argAttrs.push_back(emptyAttrSet); // %csp attribute @@ -1344,6 +1479,15 @@ void MutateEntryPoint::processComputeFuncs(ShaderInputs *shaderInputs, Module &m shaderInputTys.pop_back(); shaderInputNames.pop_back(); } +#if LLPC_BUILD_GFX12 + if (isDynamicVgprEnabled()) { + // Add MaxOutgoingVgprCount as the last argument. + // NOTE: Not doing this in generateEntryPointArgTys() as `MaxOutgoingVgprCount` is not essentially a userdata, + // and it only exists in CPS functions. + shaderInputTys.push_back(Type::getInt32Ty(module.getContext())); + shaderInputNames.push_back("MaxOutgoingVgprCount"); + } +#endif m_cpsShaderInputCache.set(shaderInputTys, shaderInputNames); } newFunc = lowerCpsFunction(origFunc, m_cpsShaderInputCache.getTypes(), m_cpsShaderInputCache.getNames()); @@ -1924,9 +2068,18 @@ void MutateEntryPoint::addSpecialUserDataArgs(SmallVectorImpl &user } } +#if LLPC_BUILD_GFX12 + if (m_pipelineState->enableSwXfb() || + (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 12 && m_pipelineState->enablePrimStats())) { + // NOTE: For GFX11+, the SW stream-out needs an additional special user data SGPR to store the stream-out control + // buffer address. And for GFX12+, we still need this special user data SGPR when we enable primitive statistics + // counting. This is because primitive counters in GDS are removed and are replaced by those defined in stream-out + // control buffer. +#else // NOTE: For GFX11+, the SW stream-out needs an additional special user data SGPR to store the stream-out control // buffer address. if (m_pipelineState->enableSwXfb()) { +#endif unsigned *controlBufPtr = nullptr; switch (m_shaderStage.value()) { diff --git a/lgc/lowering/NggPrimShader.cpp b/lgc/lowering/NggPrimShader.cpp index 17a0f1e70a..6a4c2267e7 100644 --- a/lgc/lowering/NggPrimShader.cpp +++ b/lgc/lowering/NggPrimShader.cpp @@ -632,7 +632,25 @@ Function *NggPrimShader::generate(Function *esMain, Function *gsMain, Function * vgprArgs[8]->setName("instanceId"); } } else { +#if LLPC_BUILD_GFX12 + // GS VGPRs + vgprArgs[0]->setName("primData"); + vgprArgs[1]->setName("primitiveId"); + vgprArgs[2]->setName("primDataAdjacency"); + + // ES VGPRs + if (m_hasTes) { + vgprArgs[3]->setName("tessCoordX"); + vgprArgs[4]->setName("tessCoordY"); + vgprArgs[5]->setName("relPatchId"); + vgprArgs[6]->setName("patchId"); + } else { + vgprArgs[3]->setName("vertexId"); + vgprArgs[4]->setName("instanceId"); + } +#else llvm_unreachable("Not implemented!"); +#endif } // Setup LDS layout @@ -862,7 +880,25 @@ FunctionType *NggPrimShader::getPrimShaderType(uint64_t &inRegMask) { argTys.push_back(m_builder.getInt32Ty()); // Instance ID } } else { +#if LLPC_BUILD_GFX12 + // GS VGPRs + argTys.push_back(m_builder.getInt32Ty()); // Primitive connectivity data + argTys.push_back(m_builder.getInt32Ty()); // Primitive ID (primitive based) + argTys.push_back(m_builder.getInt32Ty()); // Primitive connectivity data (adjacency) + + // ES VGPRs + if (m_hasTes) { + argTys.push_back(m_builder.getFloatTy()); // X of TessCoord (U) + argTys.push_back(m_builder.getFloatTy()); // Y of TessCoord (V) + argTys.push_back(m_builder.getInt32Ty()); // Relative patch ID + argTys.push_back(m_builder.getInt32Ty()); // Patch ID + } else { + argTys.push_back(m_builder.getInt32Ty()); // Vertex ID + argTys.push_back(m_builder.getInt32Ty()); // Instance ID + } +#else llvm_unreachable("Not implemented!"); +#endif } return FunctionType::get(m_builder.getVoidTy(), argTys, false); @@ -928,6 +964,34 @@ void NggPrimShader::calcStreamOutControlCbOffsets() { 4; } } + +#if LLPC_BUILD_GFX12 + // Following calculations are only available on GFX12+ (caused by GDS removal) + if (m_gfxIp.major >= 12) { + for (unsigned i = 0; i < MaxGsStreams; ++i) { + m_streamOutControlCbOffsets.primsNeeded[i] = (offsetof(Util::Abi::StreamOutControlCb, primsNeeded[0]) + + sizeof(Util::Abi::StreamOutControlCb::primsNeeded[0]) * i) / + 4; + } + + for (unsigned i = 0; i < MaxGsStreams; ++i) { + m_streamOutControlCbOffsets.primsWritten[i] = (offsetof(Util::Abi::StreamOutControlCb, primsWritten[0]) + + sizeof(Util::Abi::StreamOutControlCb::primsWritten[0]) * i) / + 4; + } + + if (m_pipelineState->enableSwXfb()) { + const unsigned orderedIdPairOffset = offsetof(Util::Abi::StreamOutControlCb, orderedIdPair[0]); + const unsigned orderedIdPairSize = sizeof(Util::Abi::OrderedIdPair); + for (unsigned i = 0; i < MaxTransformFeedbackBuffers; ++i) { + m_streamOutControlCbOffsets.orderedIdPair[i].orderedWaveId = + (orderedIdPairOffset + orderedIdPairSize * i + offsetof(Util::Abi::OrderedIdPair, orderedWaveId)) / 4; + m_streamOutControlCbOffsets.orderedIdPair[i].dwordsWritten = + (orderedIdPairOffset + orderedIdPairSize * i + offsetof(Util::Abi::OrderedIdPair, dwordsWritten)) / 4; + } + } + } +#endif } // ===================================================================================================================== @@ -966,7 +1030,11 @@ void NggPrimShader::buildPassthroughPrimShader(Function *primShader) { if (m_gfxIp.major <= 11) primitiveId = vgprArgs[2]; else +#if LLPC_BUILD_GFX12 + primitiveId = vgprArgs[1]; +#else llvm_unreachable("Not implemented!"); +#endif // // For pass-through mode, the processing is something like this: @@ -1037,10 +1105,27 @@ void NggPrimShader::buildPassthroughPrimShader(Function *primShader) { // Primitive connectivity data have such layout: // +#if LLPC_BUILD_GFX12 + // Pre-GFX12: +#endif // +----------------+---------------+---------------+---------------+ // | Null Primitive | Vertex Index2 | Vertex Index1 | Vertex Index0 | // | [31] | [28:20] | [18:10] | [8:0] | // +----------------+---------------+---------------+---------------+ +#if LLPC_BUILD_GFX12 + // + // GFX12 (from GE): + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // | GS Instance ID | Edge Flag2 | Vertex Index2 | Edge Flag1 | Vertex Index1 | Edge Flag0 | Vertex Index0 | + // | [31:27] | [26] | [25:18] | [17] | [16:9] | [8] | [7:0] | + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // + // GFX12 (to PA): + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // | Null Primitive | Edge Flag2 | Vertex Index2 | Edge Flag1 | Vertex Index1 | Edge Flag0 | Vertex Index0 | + // | [31] | [26] | [25:18] | [17] | [16:9] | [8] | [7:0] | + // +----------------+------------+---------------+------------+---------------+------------+---------------+ +#endif // Record relative vertex indices if (m_gfxIp.major <= 11) { @@ -1048,7 +1133,13 @@ void NggPrimShader::buildPassthroughPrimShader(Function *primShader) { m_nggInputs.vertexIndex1 = createUBfe(primData, 10, 9); m_nggInputs.vertexIndex2 = createUBfe(primData, 20, 9); } else { +#if LLPC_BUILD_GFX12 + m_nggInputs.vertexIndex0 = createUBfe(primData, 0, 8); + m_nggInputs.vertexIndex1 = createUBfe(primData, 9, 8); + m_nggInputs.vertexIndex2 = createUBfe(primData, 18, 8); +#else llvm_unreachable("Not implemented!"); +#endif } // Distribute primitive ID if needed @@ -1221,7 +1312,21 @@ void NggPrimShader::buildPrimShader(Function *primShader) { instanceId = vgprArgs[8]; } } else { +#if LLPC_BUILD_GFX12 + primitiveId = vgprArgs[1]; + + if (m_hasTes) { + tessCoordX = vgprArgs[3]; + tessCoordY = vgprArgs[4]; + relPatchId = vgprArgs[5]; + patchId = vgprArgs[6]; + } else { + vertexId = vgprArgs[3]; + instanceId = vgprArgs[4]; + } +#else llvm_unreachable("Not implemented!"); +#endif } // @@ -1390,13 +1495,28 @@ void NggPrimShader::buildPrimShader(Function *primShader) { // Record primitive shader table address info m_nggInputs.primShaderTableAddr = std::make_pair(primShaderTableAddrLow, primShaderTableAddrHigh); +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major >= 12) { + // NOTE: From GFX12+, GE will always send the primitive connectivity data to us (the highest 5 bits are GS + // instance ID, which is not valid when API GS is absent). We can record this data and use it + // when exporting primitive to PA without reconstructing it like what we have done on pre-GFX12. + m_nggInputs.primData = createUBfe(vgprArgs[0], 0, 27); + } +#endif + // Record vertex indices if (m_gfxIp.major <= 11) { m_nggInputs.vertexIndex0 = createUBfe(vgprArgs[0], 0, 16); m_nggInputs.vertexIndex1 = createUBfe(vgprArgs[0], 16, 16); m_nggInputs.vertexIndex2 = createUBfe(vgprArgs[1], 0, 16); } else { +#if LLPC_BUILD_GFX12 + m_nggInputs.vertexIndex0 = createUBfe(vgprArgs[0], 0, 8); + m_nggInputs.vertexIndex1 = createUBfe(vgprArgs[0], 9, 8); + m_nggInputs.vertexIndex2 = createUBfe(vgprArgs[0], 18, 8); +#else llvm_unreachable("Not implemented!"); +#endif } vertexItemOffset = m_builder.CreateMul(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(esGsRingItemSize)); @@ -2874,6 +2994,11 @@ void NggPrimShader::loadStreamOutBufferInfo(Value *userData) { assert(m_pipelineState->enableSwXfb() || m_pipelineState->enablePrimStats()); if (m_pipelineState->enablePrimStats() && !m_pipelineState->enableSwXfb() && m_gfxIp.major <= 11) { +#if LLPC_BUILD_GFX12 + // NOTE: For pre-GFX12, if we only want to do primitive statistics counting (no SW XFB), there is no need of load + // stream-out buffer info. The primitive counters are in GDS. For GFX12+, GDS is removed and the counters are + // defined in stream-out control buffer. We still have to load the info. +#endif return; } @@ -3200,10 +3325,27 @@ void NggPrimShader::exportPrimitive(Value *primitiveCulled) { // Primitive connectivity data have such layout: // +#if LLPC_BUILD_GFX12 + // pre-GFX12: +#endif // +----------------+---------------+---------------+---------------+ // | Null Primitive | Vertex Index2 | Vertex Index1 | Vertex Index0 | // | [31] | [28:20] | [18:10] | [8:0] | // +----------------+---------------+---------------+---------------+ +#if LLPC_BUILD_GFX12 + // + // GFX12 (from GE): + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // | GS Instance ID | Edge Flag2 | Vertex Index2 | Edge Flag1 | Vertex Index1 | Edge Flag0 | Vertex Index0 | + // | [31:27] | [26] | [25:18] | [17] | [16:9] | [8] | [7:0] | + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // + // GFX12 (to PA): + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // | Null Primitive | Edge Flag2 | Vertex Index2 | Edge Flag1 | Vertex Index1 | Edge Flag0 | Vertex Index0 | + // | [31] | [26] | [25:18] | [17] | [16:9] | [8] | [7:0] | + // +----------------+------------+---------------+------------+---------------+------------+---------------+ +#endif Value *primData = nullptr; if (m_gfxIp.major <= 11) { primData = m_builder.CreateShl(vertexIndex2, 10); @@ -3212,7 +3354,21 @@ void NggPrimShader::exportPrimitive(Value *primitiveCulled) { primData = m_builder.CreateShl(primData, 10); primData = m_builder.CreateOr(primData, vertexIndex0); } else { +#if LLPC_BUILD_GFX12 + if (m_compactVertex) { + primData = m_builder.CreateShl(vertexIndex2, 9); + primData = m_builder.CreateOr(primData, vertexIndex1); + + primData = m_builder.CreateShl(primData, 9); + primData = m_builder.CreateOr(primData, vertexIndex0); + } else { + // NOTE: If vertex compaction is disabled, we can use the recorded primitive connectivity data straightforwardly + // (sent by GE) without reconstructing it from relative vertex indices. + primData = m_nggInputs.primData; + } +#else llvm_unreachable("Not implemented!"); +#endif } if (primitiveCulled) @@ -3270,10 +3426,27 @@ void NggPrimShader::exportPrimitiveWithGs(Value *startingVertexIndex) { // Primitive connectivity data have such layout: // +#if LLPC_BUILD_GFX12 + // pre-GFX12: +#endif // +----------------+---------------+---------------+---------------+ // | Null Primitive | Vertex Index2 | Vertex Index1 | Vertex Index0 | // | [31] | [28:20] | [18:10] | [8:0] | // +----------------+---------------+---------------+---------------+ +#if LLPC_BUILD_GFX12 + // + // GFX12 (from GE): + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // | GS Instance ID | Edge Flag2 | Vertex Index2 | Edge Flag1 | Vertex Index1 | Edge Flag0 | Vertex Index0 | + // | [31:27] | [26] | [25:18] | [17] | [16:9] | [8] | [7:0] | + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // + // GFX12 (to PA): + // +----------------+------------+---------------+------------+---------------+------------+---------------+ + // | Null Primitive | Edge Flag2 | Vertex Index2 | Edge Flag1 | Vertex Index1 | Edge Flag0 | Vertex Index0 | + // | [31] | [26] | [25:18] | [17] | [16:9] | [8] | [7:0] | + // +----------------+------------+---------------+------------+---------------+------------+---------------+ +#endif Value *newPrimData = nullptr; const auto &geometryMode = m_pipelineState->getShaderModes()->getGeometryShaderMode(); @@ -3287,7 +3460,11 @@ void NggPrimShader::exportPrimitiveWithGs(Value *startingVertexIndex) { if (m_gfxIp.major <= 11) newPrimData = m_builder.CreateOr(m_builder.CreateShl(vertexIndex1, 10), vertexIndex0); else +#if LLPC_BUILD_GFX12 + newPrimData = m_builder.CreateOr(m_builder.CreateShl(vertexIndex1, 9), vertexIndex0); +#else llvm_unreachable("Not implemented!"); +#endif break; } case OutputPrimitives::TriangleStrip: { @@ -3321,7 +3498,12 @@ void NggPrimShader::exportPrimitiveWithGs(Value *startingVertexIndex) { m_builder.CreateShl(m_builder.CreateOr(m_builder.CreateShl(vertexIndex2, 10), vertexIndex1), 10), vertexIndex0); } else { +#if LLPC_BUILD_GFX12 + newPrimData = m_builder.CreateOr( + m_builder.CreateShl(m_builder.CreateOr(m_builder.CreateShl(vertexIndex2, 9), vertexIndex1), 9), vertexIndex0); +#else llvm_unreachable("Not implemented!"); +#endif } break; } @@ -3473,7 +3655,19 @@ void NggPrimShader::runEs(ArrayRef args) { instanceId = vgprArgs[8]; } } else { +#if LLPC_BUILD_GFX12 + if (m_hasTes) { + tessCoordX = vgprArgs[3]; + tessCoordY = vgprArgs[4]; + relPatchId = vgprArgs[5]; + patchId = vgprArgs[6]; + } else { + vertexId = vgprArgs[3]; + instanceId = vgprArgs[4]; + } +#else llvm_unreachable("Not implemented!"); +#endif } SmallVector esArgs; @@ -3701,7 +3895,23 @@ void NggPrimShader::runGs(ArrayRef args) { // purposes according to GE-SPI interface. invocationId = m_builder.CreateAnd(vgprArgs[3], m_builder.getInt32(0xFF)); } else { +#if LLPC_BUILD_GFX12 + const auto esGsRingItemSize = m_builder.getInt32( + m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.hwConfig.esGsRingItemSize); + + esGsOffset0 = m_builder.CreateMul(createUBfe(vgprArgs[0], 0, 8), esGsRingItemSize); + esGsOffset1 = m_builder.CreateMul(createUBfe(vgprArgs[0], 9, 8), esGsRingItemSize); + esGsOffset2 = m_builder.CreateMul(createUBfe(vgprArgs[0], 18, 8), esGsRingItemSize); + esGsOffset3 = m_builder.CreateMul(createUBfe(vgprArgs[2], 0, 8), esGsRingItemSize); + esGsOffset4 = m_builder.CreateMul(createUBfe(vgprArgs[2], 9, 8), esGsRingItemSize); + esGsOffset5 = m_builder.CreateMul(createUBfe(vgprArgs[2], 18, 8), esGsRingItemSize); + + primitiveId = vgprArgs[1]; + // NOTE: For GFX12, GS invocation ID is stored in highest 5 bits ([31:27]) + invocationId = createUBfe(vgprArgs[0], 27, 5); +#else llvm_unreachable("Not implemented!"); +#endif } SmallVector gsArgs; @@ -6496,7 +6706,19 @@ void NggPrimShader::collectExports(ArrayRef args, Function *&fromFun instanceId = vgprArgs[8]; } } else { +#if LLPC_BUILD_GFX12 + if (m_hasTes) { + tessCoordX = vgprArgs[3]; + tessCoordY = vgprArgs[4]; + relPatchId = vgprArgs[5]; + patchId = vgprArgs[6]; + } else { + vertexId = vgprArgs[3]; + instanceId = vgprArgs[4]; + } +#else llvm_unreachable("Not implemented!"); +#endif } if (m_compactVertex) { @@ -6802,6 +7024,19 @@ void NggPrimShader::exportAttributes(const SmallVectorImpl &attrib CoherentFlag coherent = {}; if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { coherent.bits.glc = true; +#if LLPC_BUILD_GFX12 + } else { + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_DEV; + coherent.gfx12.th = TH::TH_NT_WB; + + unsigned cachePolicy = m_pipelineState->getOptions().cacheScopePolicyControl; + if (cachePolicy & CacheScopePolicyType::AtmWriteUseSystemScope) { + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_SYS; + coherent.gfx12.th = TH::TH_WB; + } + coherent.gfx12.th = + m_pipelineState->getTemporalHint(coherent.gfx12.th, TemporalHintOpType::TemporalHintAtmWrite); +#endif } m_builder.CreateIntrinsic(m_builder.getVoidTy(), Intrinsic::amdgcn_struct_buffer_store, @@ -6949,6 +7184,11 @@ void NggPrimShader::processSwXfb(ArrayRef args, const SmallVectorImp if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { coherent.bits.glc = true; coherent.bits.slc = true; +#if LLPC_BUILD_GFX12 + } else { + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_DEV; + coherent.gfx12.th = TH::TH_HT; +#endif } // vertexOffset = (threadIdInSubgroup * vertsPerPrim + vertexIndex) * xfbStride @@ -7374,6 +7614,11 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args, const SmallVec if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { coherent.bits.glc = true; coherent.bits.slc = true; +#if LLPC_BUILD_GFX12 + } else { + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_DEV; + coherent.gfx12.th = TH::TH_HT; +#endif } // vertexOffset = (threadIdInSubgroup * outVertsPerPrim + vertexIndex) * xfbStride @@ -7619,7 +7864,329 @@ void NggPrimShader::prepareSwXfb(ArrayRef primCountInSubgroup) { return; } +#if LLPC_BUILD_GFX12 + // GFX12+ SW emulated stream-out with global ordered atomic add support + assert(m_gfxIp.major >= 12); + + // + // The processing is something like this: + // + // PREPARE_XFB() { + // if (threadIdInSubgroup < MaxGsStreams && streamActive) + // numPrimsToWrite[X] = primCountInSubgroup[X] + // + // if (threadIdInSubgroup < MaxTransformFeedbackBuffers && bufferActive) { + // Load ordered ID pair from stream-out control buffer and try to increment dwordsWritten[X] + // while (orderedWaveId != readyOrderedWaveId) { + // Sleep for a while + // Reload ordered ID pair from stream-out control buffer and try to increment dwordsWritten[X] + // } + // + // Calculate primsToWrite and dwordsToWrite + // Revise dwordsWritten[X] + // Store XFB statistics info to LDS + // } + // + // if (threadIdInSubgroup < MaxGsStreams && streamActive) + // Increment primsNeeded[X] and primsWritten[X] + // + unsigned numActiveBuffers = 0; + unsigned activeBufferMask = 0; + for (unsigned i = 0; i < MaxTransformFeedbackBuffers; ++i) { + if (!bufferActive[i]) + continue; // XFB buffer is inactive + + ++numActiveBuffers; + activeBufferMask |= (1U << i); + } + + unsigned numActiveStreams = 0; + unsigned activeStreamMask = 0; + for (unsigned i = 0; i < MaxGsStreams; ++i) { + if (!m_pipelineState->isVertexStreamActive(i)) + continue; // Vertex stream is inactive + + ++numActiveStreams; + activeStreamMask |= (1U << i); + } + + const unsigned waveSize = m_pipelineState->getShaderWaveSize( + m_hasGs ? ShaderStage::Geometry : (m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex)); + + const unsigned xfbStatsRegionStart = getLdsRegionStart(PrimShaderLdsRegion::XfbStats); + + auto insertBlock = m_builder.GetInsertBlock(); + auto primShader = insertBlock->getParent(); + + auto initPrimitivesToWriteBlock = createBlock(primShader, ".initPrimitivesToWrite"); + initPrimitivesToWriteBlock->moveAfter(insertBlock); + + auto endInitPrimitivesToWriteBlock = createBlock(primShader, ".endInitPrimitivesToWrite"); + endInitPrimitivesToWriteBlock->moveAfter(initPrimitivesToWriteBlock); + + auto waveOrderingHeaderBlock = createBlock(primShader, ".waveOrderingHeader"); + waveOrderingHeaderBlock->moveAfter(endInitPrimitivesToWriteBlock); + + auto waveOrderingBodyBlock = createBlock(primShader, ".waveOrderingBody"); + waveOrderingBodyBlock->moveAfter(waveOrderingHeaderBlock); + + auto endWaveOrderingBlock = createBlock(primShader, ".endWaveOrdering"); + endWaveOrderingBlock->moveAfter(waveOrderingBodyBlock); + + auto checkUpdatePrimitiveCounterBlock = createBlock(primShader, ".checkUpdatePrimitiveCounter"); + checkUpdatePrimitiveCounterBlock->moveAfter(endWaveOrderingBlock); + + auto updatePrimitiveCounterBlock = createBlock(primShader, ".updatePrimitiveCounter"); + updatePrimitiveCounterBlock->moveAfter(checkUpdatePrimitiveCounterBlock); + + auto endUpdatePrimitiveCounterBlock = createBlock(primShader, ".endUpdatePrimitiveCounter"); + endUpdatePrimitiveCounterBlock->moveAfter(updatePrimitiveCounterBlock); + + // Continue to construct insert block + Value *validStream = nullptr; + Value *numPrimsInStream = nullptr; + { + numPrimsInStream = PoisonValue::get(m_builder.getInt32Ty()); + + for (unsigned i = 0; i < MaxGsStreams; ++i) { + if (!m_pipelineState->isVertexStreamActive(i)) + continue; + + if (numActiveStreams > 1) { + // Multiple active vertex streams, promote the values to VGPRs + numPrimsInStream = m_builder.CreateIntrinsic(m_builder.getInt32Ty(), Intrinsic::amdgcn_writelane, + {primCountInSubgroup[i], m_builder.getInt32(i), numPrimsInStream}); + } else { + // Single active vertex stream, keep the values in SGPR + assert(numActiveStreams == 1); + numPrimsInStream = primCountInSubgroup[i]; + break; + } + } + + validStream = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(MaxGsStreams)); + assert(activeStreamMask != 0); + Value *mask = m_builder.getIntN(waveSize, activeStreamMask); + auto activeStream = m_builder.CreateIntrinsic(m_builder.getInt1Ty(), Intrinsic::amdgcn_inverse_ballot, mask); + + validStream = m_builder.CreateAnd(validStream, activeStream); + m_builder.CreateCondBr(validStream, initPrimitivesToWriteBlock, endInitPrimitivesToWriteBlock); + } + + // Construct ".initPrimitivesToWrite" block + { + m_builder.SetInsertPoint(initPrimitivesToWriteBlock); + + auto ldsOffset = m_builder.CreateAdd(m_builder.getInt32(xfbStatsRegionStart + MaxTransformFeedbackBuffers), + m_nggInputs.threadIdInSubgroup); + writeValueToLds(numPrimsInStream, ldsOffset); + + m_builder.CreateBr(endInitPrimitivesToWriteBlock); + } + + // Construct ".endInitPrimitivesToWrite" block + Value *dwordsPerPrim = nullptr; + Value *bufferSizeInDwords = nullptr; + Value *streamOutBufOffset = nullptr; + Value *dwordsNeeded = nullptr; + Value *bufferToStream = nullptr; + { + m_builder.SetInsertPoint(endInitPrimitivesToWriteBlock); + + dwordsPerPrim = PoisonValue::get(m_builder.getInt32Ty()); + bufferSizeInDwords = PoisonValue::get(m_builder.getInt32Ty()); + streamOutBufOffset = PoisonValue::get(m_builder.getInt32Ty()); + Value *primitiveCount = primCountInSubgroup[0]; + bufferToStream = m_builder.getInt32(0); + + for (unsigned i = 0; i < MaxTransformFeedbackBuffers; ++i) { + if (!bufferActive[i]) + continue; + + Value *primitiveSize = + m_builder.CreateMul(m_verticesPerPrimitive, m_builder.getInt32(xfbStrides[i] / sizeof(unsigned))); + + // NUM_RECORDS = SQ_BUF_RSRC_WORD2 + Value *numRecords = m_builder.CreateExtractElement(m_streamOutBufDescs[i], 2); + // bufferSizeInDwords = numRecords >> 2 (NOTE: NUM_RECORDS is set to the byte size of stream-out buffer) + Value *bufferSize = m_builder.CreateLShr(numRecords, 2); + + if (numActiveBuffers > 1) { + // Multiple active XFB buffers, promote the values to VGPRs for later handling + dwordsPerPrim = m_builder.CreateIntrinsic(m_builder.getInt32Ty(), Intrinsic::amdgcn_writelane, + {primitiveSize, m_builder.getInt32(i), dwordsPerPrim}); + + bufferSizeInDwords = m_builder.CreateIntrinsic(m_builder.getInt32Ty(), Intrinsic::amdgcn_writelane, + {bufferSize, m_builder.getInt32(i), bufferSizeInDwords}); + + streamOutBufOffset = + m_builder.CreateIntrinsic(m_builder.getInt32Ty(), Intrinsic::amdgcn_writelane, + {m_streamOutBufOffsets[i], m_builder.getInt32(i), streamOutBufOffset}); + + if (m_hasGs) { + primitiveCount = m_builder.CreateIntrinsic( + m_builder.getInt32Ty(), Intrinsic::amdgcn_writelane, + {primCountInSubgroup[xfbBufferToStream[i]], m_builder.getInt32(i), primitiveCount}); + + bufferToStream = m_builder.CreateIntrinsic( + m_builder.getInt32Ty(), Intrinsic::amdgcn_writelane, + {m_builder.getInt32(xfbBufferToStream[i]), m_builder.getInt32(i), bufferToStream}); + } + } else { + // Single active XFB buffer, keep the values in SGPR + assert(numActiveBuffers == 1); + dwordsPerPrim = primitiveSize; + bufferSizeInDwords = bufferSize; + streamOutBufOffset = m_streamOutBufOffsets[i]; + if (m_hasGs) { + primitiveCount = primCountInSubgroup[xfbBufferToStream[i]]; + bufferToStream = m_builder.getInt32(xfbBufferToStream[i]); + } + + break; // We can exit the loop since we just handle one active XFB buffer + } + } + + dwordsNeeded = m_builder.CreateMul(dwordsPerPrim, primitiveCount); + + auto validBuffer = + m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(MaxTransformFeedbackBuffers)); + assert(activeBufferMask != 0); + Value *mask = m_builder.getIntN(waveSize, activeBufferMask); + auto activeBuffer = m_builder.CreateIntrinsic(m_builder.getInt1Ty(), Intrinsic::amdgcn_inverse_ballot, mask); + validBuffer = m_builder.CreateAnd(validBuffer, activeBuffer); + m_builder.CreateCondBr(validBuffer, waveOrderingHeaderBlock, checkUpdatePrimitiveCounterBlock); + } + + // Construct ".waveOrderingHeader" block + Value *readyOrderedWaveId = nullptr; + Value *dwordsWritten = nullptr; + { + m_builder.SetInsertPoint(waveOrderingHeaderBlock); + + const unsigned orderedIdPairStride = m_streamOutControlCbOffsets.orderedIdPair[1].orderedWaveId - + m_streamOutControlCbOffsets.orderedIdPair[0].orderedWaveId; + auto orderedIdPairOffset = + m_builder.CreateMul(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(orderedIdPairStride)); + orderedIdPairOffset = m_builder.CreateAdd( + m_builder.getInt32(m_streamOutControlCbOffsets.orderedIdPair[0].orderedWaveId), orderedIdPairOffset); + + auto oldOrderedIdPair = globalAtomicOrderedAdd(std::make_pair(m_nggInputs.orderedWaveId, dwordsNeeded), + m_streamOutControlBufPtr, orderedIdPairOffset); + readyOrderedWaveId = oldOrderedIdPair.first; + dwordsWritten = oldOrderedIdPair.second; + + auto needToWait = m_builder.CreateICmpNE(m_nggInputs.orderedWaveId, readyOrderedWaveId); + m_builder.CreateCondBr(needToWait, waveOrderingBodyBlock, endWaveOrderingBlock); + } + + // Construct ".waveOrderingBody" block + { + m_builder.SetInsertPoint(waveOrderingBodyBlock); + + // NOTE: We use such rules to derive a variable amount of wait time based on the difference between orderedWaveId + // and readyOrderedWaveId: + // + // - If the difference value is only 1, we at most ~1/2 L2 latency time (~128 clocks). + // - For every additional unit of the difference value, we add ~2 round trip times (~512 clocks) to the sleep + // duration. + // + // The formula is therefore as follow (the unit of the sleep duration of s_sleep is 64 clocks): + // waitTime = 8 * (orderedWaveId - readyOrderedWaveId - 1) + 2 + auto waitTime = m_builder.CreateSub(m_nggInputs.orderedWaveId, readyOrderedWaveId); + waitTime = m_builder.CreateSub(waitTime, m_builder.getInt32(1)); + waitTime = m_builder.CreateMul(waitTime, m_builder.getInt32(8)); + waitTime = m_builder.CreateAdd(waitTime, m_builder.getInt32(2)); + m_builder.CreateIntrinsic(Intrinsic::amdgcn_s_sleep_var, {}, waitTime); + + m_builder.CreateBr(waveOrderingHeaderBlock); + } + + // Construct ".endWaveOrdering" block + { + m_builder.SetInsertPoint(endWaveOrderingBlock); + + // dwordsRemaining = max(0, bufferSizeInDwords - (bufferOffset + dwordsWritten)) + Value *dwordsRemaining = + m_builder.CreateSub(bufferSizeInDwords, m_builder.CreateAdd(streamOutBufOffset, dwordsWritten)); + dwordsRemaining = m_builder.CreateIntrinsic(Intrinsic::smax, dwordsRemaining->getType(), + {dwordsRemaining, m_builder.getInt32(0)}); + // primsCanWrite = dwordsRemaining / dwordsPerPrim + Value *primsCanWrite = m_builder.CreateUDiv(dwordsRemaining, dwordsPerPrim); + // numPrimsToWrite = ds_min(primsCanWrite, numPrimsToWrite) + auto ldsOffset = + m_builder.CreateAdd(m_builder.getInt32(xfbStatsRegionStart + MaxTransformFeedbackBuffers), bufferToStream); + atomicOp(AtomicRMWInst::UMin, primsCanWrite, ldsOffset); + auto numPrimsToWrite = + readValueFromLds(m_builder.getInt32Ty(), ldsOffset); // Read back the final result of numPrimsToWrite + + ldsOffset = m_builder.CreateAdd(m_builder.getInt32(xfbStatsRegionStart), m_nggInputs.threadIdInSubgroup); + writeValueToLds(dwordsWritten, ldsOffset); + + // dwordsToWrite = numPrimsToWrite * dwordsPerPrim + auto dwordsToWrite = m_builder.CreateMul(numPrimsToWrite, dwordsPerPrim); + // dwordsWrittenDelta = dwordsNeeded - dwordsToWrite + auto dwordsWrittenDelta = m_builder.CreateSub(dwordsNeeded, dwordsToWrite); + + // Revise dwordsWritten[X] + const unsigned dwordsWrittenStride = m_streamOutControlCbOffsets.orderedIdPair[1].dwordsWritten - + m_streamOutControlCbOffsets.orderedIdPair[0].dwordsWritten; + auto dwordsWrittenOffset = + m_builder.CreateMul(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(dwordsWrittenStride)); + dwordsWrittenOffset = m_builder.CreateAdd( + m_builder.getInt32(m_streamOutControlCbOffsets.orderedIdPair[0].dwordsWritten), dwordsWrittenOffset); + + globalAtomicOp(AtomicRMWInst::BinOp::Sub, dwordsWrittenDelta, m_streamOutControlBufPtr, dwordsWrittenOffset); + + m_builder.CreateBr(checkUpdatePrimitiveCounterBlock); + } + + // Construct ".checkUpdatePrimitiveCounter" block + { + m_builder.SetInsertPoint(checkUpdatePrimitiveCounterBlock); + + m_builder.CreateCondBr(validStream, updatePrimitiveCounterBlock, endUpdatePrimitiveCounterBlock); + } + + // Construct ".updatePrimitiveCounter" block + { + m_builder.SetInsertPoint(updatePrimitiveCounterBlock); + + // Update the counters primsNeed[X] and primsWritten[X] + const unsigned primsNeededStride = + m_streamOutControlCbOffsets.primsNeeded[1] - m_streamOutControlCbOffsets.primsNeeded[0]; + auto primsNeededOffset = m_builder.CreateMul(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(primsNeededStride)); + primsNeededOffset = + m_builder.CreateAdd(m_builder.getInt32(m_streamOutControlCbOffsets.primsNeeded[0]), primsNeededOffset); + + globalAtomicOp(AtomicRMWInst::Add, m_builder.CreateZExt(numPrimsInStream, m_builder.getInt64Ty()), + m_streamOutControlBufPtr, primsNeededOffset); + + auto ldsOffset = m_builder.CreateAdd(m_builder.getInt32(xfbStatsRegionStart + MaxTransformFeedbackBuffers), + m_nggInputs.threadIdInSubgroup); + Value *numPrimsToWrite = readValueFromLds(m_builder.getInt32Ty(), ldsOffset); + + const unsigned primsWrittenStride = + m_streamOutControlCbOffsets.primsWritten[1] - m_streamOutControlCbOffsets.primsWritten[0]; + auto primsWrittenOffset = + m_builder.CreateMul(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(primsWrittenStride)); + primsWrittenOffset = + m_builder.CreateAdd(m_builder.getInt32(m_streamOutControlCbOffsets.primsWritten[0]), primsWrittenOffset); + + globalAtomicOp(AtomicRMWInst::Add, m_builder.CreateZExt(numPrimsToWrite, m_builder.getInt64Ty()), + m_streamOutControlBufPtr, primsWrittenOffset); + + m_builder.CreateBr(endUpdatePrimitiveCounterBlock); + } + + // Construct ".endUpdatePrimitiveCounter" block + { + m_builder.SetInsertPoint(endUpdatePrimitiveCounterBlock); + // Nothing to do + } +#else llvm_unreachable("Not implemented!"); +#endif } // ===================================================================================================================== @@ -7667,7 +8234,16 @@ void NggPrimShader::collectPrimitiveStats() { {m_builder.getInt32(0), // value to add m_builder.getInt32(GDS_STRMOUT_PRIMS_WRITTEN_0 << 2)}); // count index } else { +#if LLPC_BUILD_GFX12 + globalAtomicOp(AtomicRMWInst::Add, + m_builder.CreateZExt(m_nggInputs.primCountInSubgroup, m_builder.getInt64Ty()), + m_streamOutControlBufPtr, m_builder.getInt32(m_streamOutControlCbOffsets.primsNeeded[0])); + + globalAtomicOp(AtomicRMWInst::Add, m_builder.getInt64(0), m_streamOutControlBufPtr, + m_builder.getInt32(m_streamOutControlCbOffsets.primsWritten[0])); +#else llvm_unreachable("Not implemented!"); +#endif } m_builder.CreateBr(endCollectPrimitiveStatsBlock); @@ -7848,7 +8424,15 @@ void NggPrimShader::collectPrimitiveStats() { {m_builder.getInt32(0), // value to add m_builder.getInt32((GDS_STRMOUT_PRIMS_WRITTEN_0 + 2 * i) << 2)}); // count index } else { +#if LLPC_BUILD_GFX12 + globalAtomicOp(AtomicRMWInst::Add, m_builder.CreateZExt(primCountInSubgroup[i], m_builder.getInt64Ty()), + m_streamOutControlBufPtr, m_builder.getInt32(m_streamOutControlCbOffsets.primsNeeded[i])); + + globalAtomicOp(AtomicRMWInst::Add, m_builder.getInt64(0), m_streamOutControlBufPtr, + m_builder.getInt32(m_streamOutControlCbOffsets.primsWritten[i])); +#else llvm_unreachable("Not implemented!"); +#endif } } @@ -8081,6 +8665,14 @@ void NggPrimShader::createFenceAndBarrier() { // ===================================================================================================================== // Create LDS barrier to guarantee the synchronization of LDS operations. void NggPrimShader::createBarrier() { +#if LLPC_BUILD_GFX12 + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 12) { + m_builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier_signal, {}, m_builder.getInt32(WorkgroupNormalBarrierId)); + m_builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier_wait, {}, + m_builder.getInt16(static_cast(WorkgroupNormalBarrierId))); + return; + } +#endif m_builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier, {}, {}); } @@ -8171,4 +8763,64 @@ llvm::Value *NggPrimShader::readValueFromCb(Type *readyTy, Value *bufPtr, Value return loadValue; } +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Do global atomic operation with the value stored in the specified pointer +// +// @param atomicOp : Atomic operation +// @param value : Value to do global atomic operation +// @param basePtr : Base pointer +// @param offset : Dword offset from the base pointer +// @returns : Result value after doing global atomic operation +Value *NggPrimShader::globalAtomicOp(AtomicRMWInst::BinOp atomicOp, Value *value, Value *basePtr, Value *offset) { + assert(basePtr->getType()->isPointerTy()); + assert(value->getType() == m_builder.getInt32Ty() || + value->getType() == m_builder.getInt64Ty()); // Must be i32 or i64 + + // Cast the address space to global + if (basePtr->getType()->getPointerAddressSpace() != ADDR_SPACE_GLOBAL) + basePtr = m_builder.CreateAddrSpaceCast(basePtr, PointerType::get(m_builder.getContext(), ADDR_SPACE_GLOBAL)); + + auto entryPtr = m_builder.CreateGEP(m_builder.getInt32Ty(), basePtr, offset); + if (value->getType() == m_builder.getInt64Ty()) + entryPtr = m_builder.CreateBitCast(entryPtr, PointerType::get(m_builder.getInt64Ty(), ADDR_SPACE_GLOBAL)); + + return m_builder.CreateAtomicRMW(atomicOp, entryPtr, value, MaybeAlign(), AtomicOrdering::Monotonic, + m_builder.getContext().getOrInsertSyncScopeID("agent")); +} + +// ===================================================================================================================== +// Do global ordered atomic add with the value stored in the specified pointer +// +// @param orderedIdPair : Ordered ID pair to do global ordered atomic add +// @param basePtr : Base pointer +// @param offset : Dword offset from the base pointer +// @returns : Old ordered ID pair before doing global ordered atomic add +std::pair NggPrimShader::globalAtomicOrderedAdd(std::pair orderedIdPair, + Value *basePtr, Value *offset) { + assert(basePtr->getType()->isPointerTy()); + assert(orderedIdPair.first->getType() == m_builder.getInt32Ty()); + assert(orderedIdPair.second->getType() == m_builder.getInt32Ty()); + + auto int32x2Ty = FixedVectorType::get(m_builder.getInt32Ty(), 2); + Value *value = PoisonValue::get(int32x2Ty); + value = m_builder.CreateInsertElement(value, orderedIdPair.first, static_cast(0)); + value = m_builder.CreateInsertElement(value, orderedIdPair.second, 1); + value = m_builder.CreateBitCast(value, m_builder.getInt64Ty()); + + // Cast the address space to global + if (basePtr->getType()->getPointerAddressSpace() != ADDR_SPACE_GLOBAL) + basePtr = m_builder.CreateAddrSpaceCast(basePtr, PointerType::get(m_builder.getContext(), ADDR_SPACE_GLOBAL)); + + auto entryPtr = m_builder.CreateGEP(m_builder.getInt32Ty(), basePtr, offset); + entryPtr = m_builder.CreateBitCast(entryPtr, PointerType::get(m_builder.getInt64Ty(), ADDR_SPACE_GLOBAL)); + Value *oldOrderedIdPair = m_builder.CreateIntrinsic( + m_builder.getInt64Ty(), Intrinsic::amdgcn_global_atomic_ordered_add_b64, {entryPtr, value}); + oldOrderedIdPair = m_builder.CreateBitCast(oldOrderedIdPair, int32x2Ty); + + return std::make_pair(m_builder.CreateExtractElement(oldOrderedIdPair, static_cast(0)), + m_builder.CreateExtractElement(oldOrderedIdPair, 1)); +} +#endif + } // namespace lgc diff --git a/lgc/lowering/NggPrimShader.h b/lgc/lowering/NggPrimShader.h index 6b70653a2c..d29c49cf53 100644 --- a/lgc/lowering/NggPrimShader.h +++ b/lgc/lowering/NggPrimShader.h @@ -200,6 +200,15 @@ typedef std::unordered_map> P // NOTE: The layout structure is defined by @ref Util::Abi::StreamOutControlCb. struct StreamOutControlCbOffsets { unsigned bufOffsets[MaxTransformFeedbackBuffers]; +#if LLPC_BUILD_GFX12 + // Following offsets are only available on GFX12+ (caused by GDS removal) + unsigned primsNeeded[MaxGsStreams]; + unsigned primsWritten[MaxGsStreams]; + struct { + unsigned orderedWaveId; + unsigned dwordsWritten; + } orderedIdPair[MaxTransformFeedbackBuffers]; +#endif }; // ===================================================================================================================== @@ -349,6 +358,12 @@ class NggPrimShader { void writeValueToLds(llvm::Value *writeValue, llvm::Value *ldsOffset, bool useDs128 = false); void atomicOp(llvm::AtomicRMWInst::BinOp atomicOp, llvm::Value *value, llvm::Value *ldsOffset); llvm::Value *readValueFromCb(llvm::Type *readyTy, llvm::Value *bufPtr, llvm::Value *offset, bool isVolatile = false); +#if LLPC_BUILD_GFX12 + llvm::Value *globalAtomicOp(llvm::AtomicRMWInst::BinOp atomicOp, llvm::Value *value, llvm::Value *basePtr, + llvm::Value *offset); + std::pair globalAtomicOrderedAdd(std::pair orderedIdPair, + llvm::Value *basePtr, llvm::Value *offset); +#endif static const unsigned NullPrim = (1u << 31); // Null primitive data (invalid) diff --git a/lgc/lowering/PassRegistry.inc b/lgc/lowering/PassRegistry.inc index 862ac9735e..48edbbf3e0 100644 --- a/lgc/lowering/PassRegistry.inc +++ b/lgc/lowering/PassRegistry.inc @@ -78,6 +78,9 @@ LLPC_FUNCTION_PASS("lgc-lower-mul-dx9-zero", LowerMulDx9Zero) LLPC_MODULE_PASS("lgc-generate-null-frag-shader", GenerateNullFragmentShader) LLPC_MODULE_PASS("lgc-passthrough-hull-shader", PassthroughHullShader) LLPC_MODULE_PASS("lgc-collect-image-operations", CollectImageOperations) +#if LLPC_BUILD_GFX12 +LLPC_FUNCTION_PASS("lgc-add-buffer-operations-metadata", AddBufferOperationMetadata) +#endif LLPC_MODULE_PASS("lgc-vertex-fetch", LowerVertexFetch) LLPC_MODULE_PASS("lgc-frag-color-export", LowerFragmentColorExport) LLPC_MODULE_PASS("lgc-lower-debug-printf", LowerDebugPrintf) diff --git a/lgc/lowering/PreparePipelineAbi.cpp b/lgc/lowering/PreparePipelineAbi.cpp index 25a323e4d4..72b9cb0fae 100644 --- a/lgc/lowering/PreparePipelineAbi.cpp +++ b/lgc/lowering/PreparePipelineAbi.cpp @@ -208,6 +208,11 @@ void PreparePipelineAbi::writeTessFactors(PipelineState *pipelineState, Value *t CoherentFlag coherent = {}; if (pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { coherent.bits.glc = true; +#if LLPC_BUILD_GFX12 + } else { + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_SYS; + coherent.gfx12.th = pipelineState->getTemporalHint(TH::TH_WB, TemporalHintTessFactorWrite); +#endif } auto primitiveMode = pipelineState->getShaderModes()->getTessellationMode().primitiveMode; @@ -324,6 +329,11 @@ void PreparePipelineAbi::writeHsOutputs(PipelineState *pipelineState, Value *off CoherentFlag coherent = {}; if (gfxIp.major <= 11) { coherent.bits.glc = true; +#if LLPC_BUILD_GFX12 + } else { + coherent.gfx12.th = TH::TH_WB; + coherent.gfx12.scope = MemoryScope::MEMORY_SCOPE_DEV; +#endif } LLPC_OUTS("===============================================================================\n"); diff --git a/lgc/lowering/RegisterMetadataBuilder.cpp b/lgc/lowering/RegisterMetadataBuilder.cpp index 0e98d11ce4..7f34067db4 100644 --- a/lgc/lowering/RegisterMetadataBuilder.cpp +++ b/lgc/lowering/RegisterMetadataBuilder.cpp @@ -210,7 +210,12 @@ void RegisterMetadataBuilder::buildLsHsRegisters() { else lsVgprCompCnt = 1; // Must enable relative vertex ID (LS VGPR2 and VGPR3) } else { +#if LLPC_BUILD_GFX12 + if (m_hasVs && vsBuiltInUsage.instanceIndex) + lsVgprCompCnt = 1; // Enable instance ID (LS VGPR3) +#else llvm_unreachable("Not implemented!"); +#endif } getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::LsVgprCompCnt] = lsVgprCompCnt; @@ -414,7 +419,21 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() { gsVgprCompCnt = 2; // Enable primitive ID (GS VGPR2) } } else { +#if LLPC_BUILD_GFX12 + if (m_hasGs) { + if (hwConfig.inputVertices > 3 && geometryMode.inputPrimitive != InputPrimitives::Patch) + gsVgprCompCnt = 2; // Enable primitive connectivity data for adjacency (GS VGPR2) + else if (gsBuiltInUsage.primitiveIdIn) + gsVgprCompCnt = 1; // Enable primitive ID (GS VGPR1) + } else if (m_hasVs) { + // NOTE: When GS is absent, only those VGPRs are required: primitive connectivity data, + // primitive ID (only for VS). + if (!hasTs && vsBuiltInUsage.primitiveId) + gsVgprCompCnt = 1; // Enable primitive ID (GS VGPR1) + } +#else llvm_unreachable("Not implemented!"); +#endif } getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::GsVgprCompCnt] = gsVgprCompCnt; @@ -431,7 +450,19 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() { esVgprCompCnt = 3; // Enable instance ID (ES VGPR8) } } else { +#if LLPC_BUILD_GFX12 + if (hasTs) { + if (tesBuiltInUsage.primitiveId) + esVgprCompCnt = 3; // Enable patch ID (ES VGPR6) + else + esVgprCompCnt = 2; // Must enable relative patch ID (ES VGPR5) + } else if (m_hasVs) { + if (vsBuiltInUsage.instanceIndex) + esVgprCompCnt = 1; // Enable instance ID (ES VGPR4) + } +#else llvm_unreachable("Not implemented!"); +#endif } getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::EsVgprCompCnt] = esVgprCompCnt; @@ -619,7 +650,12 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() { if (!nggControl->passthroughMode) { // If the NGG culling data buffer is not already specified by a hardware stage's user_data_reg_map, then this // field specified the register offset that is expected to point to the low 32-bits of address to the buffer. +#if LLPC_BUILD_GFX12 + getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::NggCullingDataReg] = + m_gfxIp.major >= 12 ? mmSPI_SHADER_PGM_LO_GS_GFX12 : mmSPI_SHADER_PGM_LO_GS; +#else getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::NggCullingDataReg] = mmSPI_SHADER_PGM_LO_GS; +#endif } } @@ -781,6 +817,10 @@ void RegisterMetadataBuilder::buildPsRegisters() { m_pipelineState->getShaderResourceUsage(shaderStage)->builtInUsage.fs.runAtSampleRate > 0; bool allowRez = shaderOptions.allowReZ; +#if LLPC_BUILD_GFX12 + if (m_pipelineState->getTargetInfo().getGpuWorkarounds().gfx12.waNoReZSupport) + allowRez = false; +#endif // DB_SHADER_CONTROL ZOrder zOrder = LATE_Z; bool execOnHeirFail = false; @@ -1112,6 +1152,10 @@ void RegisterMetadataBuilder::buildShaderExecutionRegisters(Util::Abi::HardwareS const auto &shaderOptions = m_pipelineState->getShaderOptions(apiStage); hwShaderNode[Util::Abi::HardwareStageMetadataKey::DebugMode] = shaderOptions.debugMode; hwShaderNode[Util::Abi::HardwareStageMetadataKey::TrapPresent] = shaderOptions.trapPresent; +#if LLPC_BUILD_GFX12 + if (m_gfxIp.major >= 12) + hwShaderNode[Util::Abi::HardwareStageMetadataKey::WorkgroupRoundRobin] = shaderOptions.workgroupRoundRobin; +#endif } hwShaderNode[Util::Abi::HardwareStageMetadataKey::UserSgprs] = userDataCount; diff --git a/lgc/lowering/SetupTargetFeatures.cpp b/lgc/lowering/SetupTargetFeatures.cpp index f15d45147e..bad97d0aa9 100644 --- a/lgc/lowering/SetupTargetFeatures.cpp +++ b/lgc/lowering/SetupTargetFeatures.cpp @@ -156,6 +156,26 @@ void SetUpTargetFeatures::setupTargetFeatures(Module *module) { auto gfxIp = m_pipelineState->getTargetInfo().getGfxIpVersion(); +#if LLPC_BUILD_GFX12 + if (gfxIp.major >= 12) { + if (m_pipelineState->getOptions().expertSchedulingMode) + builder.addAttribute("amdgpu-expert-scheduling", "true"); + + if (m_pipelineState->getOptions().disableDynamicVgpr || + m_pipelineState->getOptions().rtIndirectMode <= RayTracingIndirectMode::Legacy) { + targetFeatures += ",-dynamic-vgpr"; + } else { + targetFeatures += ",+dynamic-vgpr"; + + // Set the dVGPR block size, unless it's unspecified or equal to LLVM's + // default value. + auto blockSize = m_pipelineState->getOptions().dynamicVgprBlockSize; + if (blockSize != 0 && blockSize != 16) + targetFeatures += ",+dynamic-vgpr-block-size-" + std::to_string(blockSize); + } + } +#endif + // NOTE: The sub-attribute 'wavefrontsize' of 'target-features' is set in advance to let optimization // pass know we are in which wavesize mode. Here, we read back it and append it to finalized target // feature strings. diff --git a/lgc/lowering/ShaderInputs.cpp b/lgc/lowering/ShaderInputs.cpp index 63de04d59d..38b6868d30 100644 --- a/lgc/lowering/ShaderInputs.cpp +++ b/lgc/lowering/ShaderInputs.cpp @@ -35,6 +35,9 @@ #include "lgc/state/ResourceUsage.h" #include "lgc/state/TargetInfo.h" #include "lgc/util/BuilderBase.h" +#if LLPC_BUILD_GFX12 +#include "llvm/IR/IntrinsicsAMDGPU.h" +#endif using namespace lgc; using namespace llvm; @@ -301,6 +304,10 @@ const char *ShaderInputs::getInputName(ShaderInput inputKind) { return "FixedXY"; case ShaderInput::LocalInvocationId: return "LocalInvocationId"; +#if LLPC_BUILD_GFX12 + case ShaderInput::CsWaveId: + return "CsWaveId"; +#endif default: llvm_unreachable("Unknown shader input kind"); } @@ -359,6 +366,22 @@ void ShaderInputs::fixupUses(Module &module, PipelineState *pipelineState, bool continue; Value *value = nullptr; +#if LLPC_BUILD_GFX12 + GfxIpVersion gfxIp = pipelineState->getTargetInfo().getGfxIpVersion(); + if (gfxIp.major >= 12 && kind == static_cast(ShaderInput::WorkgroupId)) { + auto &entryBlock = func.getEntryBlock(); + BuilderBase builder(&entryBlock.front()); + value = PoisonValue::get(FixedVectorType::get(builder.getInt32Ty(), 3)); + unsigned intrinsics[3] = {Intrinsic::amdgcn_workgroup_id_x, Intrinsic::amdgcn_workgroup_id_y, + Intrinsic::amdgcn_workgroup_id_z}; + for (auto [i, id] : enumerate(intrinsics)) + value = builder.CreateInsertElement(value, builder.CreateIntrinsic(id, {}, {}), i); + } else if (gfxIp.major >= 12 && kind == static_cast(ShaderInput::CsWaveId)) { + auto &entryBlock = func.getEntryBlock(); + BuilderBase builder(&entryBlock.front()); + value = builder.CreateIntrinsic(Intrinsic::amdgcn_wave_id, {}, {}); + } else +#endif { if (inputUsage->entryArgIdx != 0) value = getFunctionArgument(&func, inputUsage->entryArgIdx); @@ -706,6 +729,11 @@ uint64_t ShaderInputs::getShaderArgTys(PipelineState *pipelineState, ShaderStage ShaderInputsUsage *inputsUsage = getShaderInputsUsage(shaderStage); assert(inputDesc.inputKind < ShaderInput::Count); ShaderInputUsage *inputUsage = inputsUsage->inputs[static_cast(inputDesc.inputKind)].get(); +#if LLPC_BUILD_GFX12 + if (pipelineState->getTargetInfo().getGfxIpVersion().major >= 12 && + (inputDesc.inputKind == ShaderInput::WorkgroupId || inputDesc.inputKind == ShaderInput::MultiDispatchInfo)) + continue; +#endif // We don't want this input if it is not marked "always" and it is not used. if (!inputDesc.always && (!inputUsage || inputUsage->users.empty())) continue; diff --git a/lgc/lowering/ShaderMerger.cpp b/lgc/lowering/ShaderMerger.cpp index 561af503ff..6b0b122b15 100644 --- a/lgc/lowering/ShaderMerger.cpp +++ b/lgc/lowering/ShaderMerger.cpp @@ -272,6 +272,9 @@ FunctionType *ShaderMerger::generateLsHsEntryPointType(uint64_t *inRegMask) cons // LS VGPRs argTys.push_back(Type::getInt32Ty(*m_context)); // Vertex ID if (m_gfxIp.major <= 11) { +#if LLPC_BUILD_GFX12 + // NOTE: GFX12 removes those two LS VGPRs. +#endif argTys.push_back(Type::getInt32Ty(*m_context)); // Relative vertex ID (auto index) argTys.push_back(Type::getInt32Ty(*m_context)); // Unused } @@ -407,7 +410,16 @@ Function *ShaderMerger::generateLsHsEntryPoint(Function *lsEntryPoint, Function relVertexId = vgprArgs[3]; instanceId = vgprArgs[5]; } else { +#if LLPC_BUILD_GFX12 + Value *waveIdInGroup = getFunctionArgument(entryPoint, getSpecialSgprInputIndex(m_gfxIp, LsHs::waveIdInGroup)); + waveIdInGroup = builder.CreateAnd(waveIdInGroup, 0x1F, "waveIdInGroup"); // waveIdInGroup = [4:0] + + relVertexId = builder.CreateMul(builder.getInt32(waveSize), waveIdInGroup); + relVertexId = builder.CreateAdd(relVertexId, threadIdInWave); + instanceId = vgprArgs[3]; +#else llvm_unreachable("Not implemented!"); +#endif } // Vertex fetch VGPRs @@ -1426,6 +1438,14 @@ void ShaderMerger::writeValueToLds(Value *writeValue, Value *ldsOffset, IRBuilde // // @param builder : IR builder to insert instructions void ShaderMerger::createBarrier(IRBuilder<> &builder) { +#if LLPC_BUILD_GFX12 + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 12) { + builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier_signal, {}, builder.getInt32(WorkgroupNormalBarrierId)); + builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier_wait, {}, + builder.getInt16(static_cast(WorkgroupNormalBarrierId))); + return; + } +#endif builder.CreateIntrinsic(Intrinsic::amdgcn_s_barrier, {}, {}); } diff --git a/lgc/lowering/VertexFetch.cpp b/lgc/lowering/VertexFetch.cpp index 367a3412cd..fcebd5ee48 100644 --- a/lgc/lowering/VertexFetch.cpp +++ b/lgc/lowering/VertexFetch.cpp @@ -158,7 +158,11 @@ class VertexFetchImpl : public VertexFetch { static const VertexCompFormatInfo m_vertexCompFormatInfo[]; // Info table of vertex component format static const VertexNumFormatInfo m_vertexNumFormatInfo[]; // Info table of vertex num format static const unsigned char m_vertexFormatMapGfx10[][9]; // Info table of vertex format mapping for GFX10 - static const unsigned char m_vertexFormatMapGfx11[][9]; // Info table of vertex format mapping for GFX11 +#if LLPC_BUILD_GFX12 + static const unsigned char m_vertexFormatMapGfx11[][9]; // Info table of vertex format mapping for GFX11~12 +#else + static const unsigned char m_vertexFormatMapGfx11[][9]; // Info table of vertex format mapping for GFX11 +#endif // Default values for vertex fetch (<4 x i32> or <8 x i32>) struct { @@ -1535,6 +1539,9 @@ unsigned VertexFetchImpl::mapVertexFormat(unsigned dfmt, unsigned nfmt) const { format = m_vertexFormatMapGfx10[dfmt][nfmt]; break; case 11: +#if LLPC_BUILD_GFX12 + case 12: +#endif assert(dfmt < sizeof(m_vertexFormatMapGfx11) / sizeof(m_vertexFormatMapGfx11[0])); assert(nfmt < sizeof(m_vertexFormatMapGfx11[0]) / sizeof(m_vertexFormatMapGfx11[0][0])); format = m_vertexFormatMapGfx11[dfmt][nfmt]; diff --git a/lgc/state/PipelineState.cpp b/lgc/state/PipelineState.cpp index 7d1e9172ca..79424109f8 100644 --- a/lgc/state/PipelineState.cpp +++ b/lgc/state/PipelineState.cpp @@ -1065,6 +1065,31 @@ const ResourceNode *PipelineState::findSingleRootResourceNode(ResourceNodeType n return nullptr; } +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Get the temporal hint. +// +// @param th : origin temporal hint, if it is not tuning, result is the same as th. +// @param opType : Operation type +// @para stage : Shader stage +unsigned PipelineState::getTemporalHint(unsigned th, TemporalHintOpType opType, ShaderStageEnum stage) { + if (stage != ShaderStageEnum::Invalid) { + // per-shader settings + unsigned temporalhint = getShaderOptions(stage).temporalHintShaderControl >> opType & 0xF; + if (temporalhint >= 8) { + return (temporalhint - 8); + } + } + + // global settings + unsigned temporalhint = m_options.temporalHintControl >> opType & 0xF; + if (temporalhint >= 8) { + return (temporalhint - 8); + } + return th; +} +#endif + // ===================================================================================================================== // Get the cached MDString for the name of a resource mapping node type, as used in IR metadata for user data nodes. // @@ -1744,6 +1769,9 @@ bool PipelineState::exportAttributeByExportInstruction() const { case 10: return true; // Always use parameter export instruction case 11: +#if LLPC_BUILD_GFX12 + case 12: +#endif return false; // Always use attribute-through-memory (ATM) default: llvm_unreachable("Unexpected GFX generation!"); diff --git a/lgc/state/RayTracingLibrarySummary.cpp b/lgc/state/RayTracingLibrarySummary.cpp index 2a6c45aa9a..27b1d1f8f3 100644 --- a/lgc/state/RayTracingLibrarySummary.cpp +++ b/lgc/state/RayTracingLibrarySummary.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -48,6 +48,9 @@ static constexpr char MaxRayPayloadSize[] = "max_ray_payload_size"; static constexpr char MaxHitAttributeSize[] = "max_hit_attribute_size"; static constexpr char HasKernelEntry[] = "has_kernel_entry"; static constexpr char HasTraceRayModule[] = "has_trace_ray_module"; +#if LLPC_BUILD_GFX12 +static constexpr char MaxOutgoingVgprCount[] = "max_outgoing_vgpr_count"; +#endif static constexpr char LlvmRaytracingState[] = "llvm_raytracing_state"; } // namespace RtLibSummary @@ -83,6 +86,9 @@ Expected RayTracingLibrarySummary::decodeMsgpack(Strin getUInt(root[RtLibSummary::MaxHitAttributeSize], rls.maxHitAttributeSize); getBool(root[RtLibSummary::HasKernelEntry], rls.hasKernelEntry); getBool(root[RtLibSummary::HasTraceRayModule], rls.hasTraceRayModule); +#if LLPC_BUILD_GFX12 + getUInt(root[RtLibSummary::MaxOutgoingVgprCount], rls.maxOutgoingVgprCount); +#endif auto errorOrState = llvmraytracing::PipelineState::decodeMsgpack(root[RtLibSummary::LlvmRaytracingState]); if (auto error = errorOrState.takeError()) return error; @@ -105,6 +111,9 @@ std::string RayTracingLibrarySummary::encodeMsgpack() const { root[RtLibSummary::MaxHitAttributeSize] = maxHitAttributeSize; root[RtLibSummary::HasKernelEntry] = hasKernelEntry; root[RtLibSummary::HasTraceRayModule] = hasTraceRayModule; +#if LLPC_BUILD_GFX12 + root[RtLibSummary::MaxOutgoingVgprCount] = maxOutgoingVgprCount; +#endif llvmRaytracingState.encodeMsgpack(root[RtLibSummary::LlvmRaytracingState]); std::string out; @@ -120,6 +129,9 @@ void RayTracingLibrarySummary::merge(const RayTracingLibrarySummary &other) { } maxRayPayloadSize = std::max(maxRayPayloadSize, other.maxRayPayloadSize); maxHitAttributeSize = std::max(maxHitAttributeSize, other.maxHitAttributeSize); +#if LLPC_BUILD_GFX12 + maxOutgoingVgprCount = std::max(maxOutgoingVgprCount, other.maxOutgoingVgprCount); +#endif // TODO: Inherit kernel entry and trace ray module if possible and avoid recompile? hasKernelEntry = false; diff --git a/lgc/state/TargetInfo.cpp b/lgc/state/TargetInfo.cpp index 560415c647..aacdc795f3 100644 --- a/lgc/state/TargetInfo.cpp +++ b/lgc/state/TargetInfo.cpp @@ -387,6 +387,28 @@ static void setGfx1151Info(TargetInfo *targetInfo) { } #endif +#if LLPC_BUILD_GFX12 +// gfx12 +// +// @param [in/out] targetInfo : Target info +static void setGfx12Info(TargetInfo *targetInfo) { + setGfx11Info(targetInfo); +} +#endif + +#if LLPC_BUILD_NAVI48 +// gfx1201 +// +// @param [in/out] targetInfo : Target info +static void setGfx1201Info(TargetInfo *targetInfo) { + setGfx12Info(targetInfo); + + targetInfo->getGpuProperty().numShaderEngines = 4; + targetInfo->getGpuProperty().numComputeUnitsPerShaderEngine = 8; + targetInfo->getGpuWorkarounds().gfx12.waNoReZSupport = 1; +} +#endif + // Represents device infos. struct GpuNameStringMap { const char *gpuName; @@ -416,6 +438,9 @@ static const GpuNameStringMap GpuNameMap[] = { #if LLPC_BUILD_STRIX_HALO {"gfx1151", "Strix_halo", &setGfx1151Info}, // gfx1151 #endif +#if LLPC_BUILD_NAVI48 + {"gfx1201", "Navi48", &setGfx1201Info}, // gfx1201 +#endif }; // ===================================================================================================================== diff --git a/lgc/test/Transforms/CpsLoweringWithDvgpr/continuation-basic.lgc b/lgc/test/Transforms/CpsLoweringWithDvgpr/continuation-basic.lgc new file mode 100644 index 0000000000..f9afcabb9c --- /dev/null +++ b/lgc/test/Transforms/CpsLoweringWithDvgpr/continuation-basic.lgc @@ -0,0 +1,164 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 +; RUN: lgc -mcpu=gfx1201 -o - -lgc-use-init-whole-wave -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s + +; REQUIRES: llvm-main-revision-ge-511860 + +declare void @lgc.cps.jump(...) #0 + +define void @test(i32 %cspInit, i32 %arg, ptr %table) !lgc.cps !1 !lgc.shaderstage !2 !continuation !3 !continuation.state !4 { +; CHECK-LABEL: define amdgpu_cs_chain void @test( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], i32 [[LOCALINVOCATIONID:%.*]], i32 inreg [[MAXOUTGOINGVGPRCOUNT:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META5:![0-9]+]] !lgc.shaderstage [[META6:![0-9]+]] !continuation [[META7:![0-9]+]] !continuation.state [[META8:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TABLE]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast i64 [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP38]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP38]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 10: +; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[TABLE_0:%.*]] = getelementptr i32, ptr [[TABLE]], i32 0 +; CHECK-NEXT: [[CR_THEN:%.*]] = load i32, ptr [[TABLE_0]], align 4 +; CHECK-NEXT: [[THEN_ARG:%.*]] = add i32 [[ARG]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[VCR]], [[ENTRY:%.*]] ], [ [[CR_THEN]], [[TMP10]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[CSPINIT]], [[ENTRY]] ], [ [[TMP6]], [[TMP10]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[ARG]], [[ENTRY]] ], [ poison, [[TMP10]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[TMP7]], [[ENTRY]] ], [ [[THEN_ARG]], [[TMP10]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[TMP8]], [[ENTRY]] ], [ poison, [[TMP10]] ] +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP17]] to <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } poison, i32 [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP19]], i32 [[TMP12]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP20]], i32 [[TMP13]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP21]], i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP22]], i32 [[TMP15]], 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP23]], i32 [[TMP16]], 5 +; CHECK-NEXT: [[TMP77:%.*]] = extractvalue { i32, i32, i32, i32, i32, i32 } [[TMP24]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP77]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP26]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP25]], 3 +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP28]]) +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP29]], i32 [[TMP27]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP25]], 2 +; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP32]]) +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP33]], i32 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP25]], 1 +; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP36]]) +; CHECK-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP37]], 0 +; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP37]], i32 [[TMP35]] +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP40]], i1 true) +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP77]], i32 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[TMP77]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], -64 +; CHECK-NEXT: [[TMP46:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP45]], i64 0 +; CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i32> [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP42]], 56 +; CHECK-NEXT: [[TMP50:%.*]] = shl i32 [[TMP49]], 1 +; CHECK-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], 16 +; CHECK-NEXT: [[TMP52:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP51]], i32 [[MAXOUTGOINGVGPRCOUNT]]) +; CHECK-NEXT: [[TMP53:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP54:%.*]] = bitcast i64 [[TMP53]] to <2 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <2 x i32> [[TMP54]], i64 0 +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x i32> [[TMP54]], i64 1 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[TMP55]], i64 1 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[TMP56]], i64 2 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[MAXOUTGOINGVGPRCOUNT]], i64 16 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[TMP45]], i64 17 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[TMP44]], i64 18 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[TMP52]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32i32i32i32i32i32s(ptr inreg [[TMP48]], i32 inreg [[TMP44]], <20 x i32> inreg [[TMP76]], { i32, i32, i32, i32, i32, i32 } [[TMP24]], i32 1, i32 [[TMP52]], i32 -1, ptr @retry_vgpr_alloc.v20i32) +; CHECK-NEXT: unreachable +; +entry: + %csp = alloca i32, align 4 + store i32 %cspInit, ptr %csp, align 4 + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0, align 4 + %then.arg = add i32 %arg, 1 + %0 = load i32, ptr %csp, align 4 + call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, i32 %0, i32 poison, i32 %then.arg) + unreachable +} + +declare !continuation !3 { ptr, ptr } @continuation.prototype.test(ptr, i1) + +declare ptr @continuation.malloc(i32) + +declare void @continuation.free(ptr) + +declare token @llvm.coro.id.retcon(i32, i32, ptr, ptr, ptr, ptr) #1 + +declare ptr @llvm.coro.begin(token, ptr writeonly) #1 + +attributes #0 = { noreturn } +attributes #1 = { nounwind } + +!continuation.stackAddrspace = !{!0} +!lgc.cps.maxArgumentVgprs = !{!5} + +!0 = !{i32 5} +!1 = !{i32 1} +!2 = !{i32 7} +!3 = !{ptr @test} +!4 = !{i32 0} +!5 = !{i32 4} +;. +; CHECK: [[META5]] = !{i32 1} +; CHECK: [[META6]] = !{i32 7} +; CHECK: [[META7]] = !{ptr @test} +; CHECK: [[META8]] = !{i32 0} +;. diff --git a/lgc/test/Transforms/CpsLoweringWithDvgpr/cps-entry-point.lgc b/lgc/test/Transforms/CpsLoweringWithDvgpr/cps-entry-point.lgc new file mode 100644 index 0000000000..67dabe6fdd --- /dev/null +++ b/lgc/test/Transforms/CpsLoweringWithDvgpr/cps-entry-point.lgc @@ -0,0 +1,150 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 +; RUN: lgc -mcpu=gfx1201 -o - -passes='require,lgc-mutate-entry-point' %s | FileCheck --check-prefixes=CHECK %s + +declare void @lgc.cps.jump(...) #0 + +declare void @lgc.cps.set.vsp(ptr addrspace(32)) #1 + +declare ptr addrspace(32) @lgc.cps.get.vsp() #2 + +define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc.shaderstage !3 !lgc.rt.shaderstage !3 !lgc.cps.maxOutgoingVgprCount !4{ +; CHECK-LABEL: define dllexport amdgpu_cs void @lgc.shader.CS.main( +; CHECK-SAME: i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[PAD4:%.*]], i32 inreg noundef [[PAD5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) +; CHECK-SAME: #[[ATTR3:[0-9]+]] !lgc.shaderstage [[META6:![0-9]+]] !lgc.rt.shaderstage [[META6]] !lgc.cps.maxOutgoingVgprCount [[META7:![0-9]+]] { +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[USERDATA0]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[USERDATA1]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[USERDATA2]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[USERDATA3]], i64 3 +; CHECK-NEXT: [[PTR:%.*]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[TMP8]], i1 false) +; CHECK-NEXT: [[P0:%.*]] = getelementptr i32, ptr addrspace(7) [[PTR]], i32 0 +; CHECK-NEXT: [[I_VSP:%.*]] = load i32, ptr addrspace(7) [[P0]], align 4 +; CHECK-NEXT: store i32 [[I_VSP]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[P1:%.*]] = getelementptr i32, ptr addrspace(7) [[PTR]], i32 1 +; CHECK-NEXT: [[CR:%.*]] = load i32, ptr addrspace(7) [[P1]], align 4 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr addrspace(7) [[PTR]], i32 2 +; CHECK-NEXT: [[ARG:%.*]] = load i32, ptr addrspace(7) [[P2]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } poison, i32 [[CR]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP12]], i32 [[TMP11]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP13]], i32 poison, 2 +; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP14]], i32 poison, 3 +; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP15]], i32 [[ARG]], 4 +; CHECK-NEXT: [[TMP56:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32 } [[TMP16]], i32 [[TMP11]], 5 +; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { i32, i32, i32, i32, i32, i32 } [[TMP56]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP17]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP22]]) +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP23]], i1 true) +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP17]], i32 [[TMP20]]) +; CHECK-NEXT: [[TMP58:%.*]] = icmp eq i32 [[TMP17]], [[TMP21]] +; CHECK-NEXT: [[TMP57:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP58]]) +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP21]], -64 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP24]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i32> [[TMP25]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP21]], 56 +; CHECK-NEXT: [[TMP29:%.*]] = shl i32 [[TMP28]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP29]], 16 +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP30]], i32 42) +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i64 [[TMP32]] to <2 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x i32> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[TMP33]], i64 1 +; CHECK-NEXT: [[TMP36:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <20 x i32> [[TMP36]], i32 [[TMP34]], i64 1 +; CHECK-NEXT: [[TMP38:%.*]] = insertelement <20 x i32> [[TMP37]], i32 [[TMP35]], i64 2 +; CHECK-NEXT: [[TMP39:%.*]] = insertelement <20 x i32> [[TMP38]], i32 [[USERDATA0]], i64 3 +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <20 x i32> [[TMP39]], i32 [[USERDATA1]], i64 4 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <20 x i32> [[TMP40]], i32 [[USERDATA2]], i64 5 +; CHECK-NEXT: [[TMP42:%.*]] = insertelement <20 x i32> [[TMP41]], i32 [[USERDATA3]], i64 6 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <20 x i32> [[TMP42]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP44:%.*]] = insertelement <20 x i32> [[TMP43]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <20 x i32> [[TMP44]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP46:%.*]] = insertelement <20 x i32> [[TMP45]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP47:%.*]] = insertelement <20 x i32> [[TMP46]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> [[TMP47]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 42, i64 16 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[TMP24]], i64 17 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[TMP57]], i64 18 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[TMP31]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32i32i32i32i32i32s(ptr inreg [[TMP27]], i32 inreg [[TMP57]], <20 x i32> inreg [[TMP55]], { i32, i32, i32, i32, i32, i32 } [[TMP56]], i32 1, i32 [[TMP31]], i32 -1, ptr @retry_vgpr_alloc.v20i32) +; CHECK-NEXT: unreachable +; +.entry: + %csp = alloca i32, align 4 + %desc = call <4 x i32> @lgc.load.user.data__v4i32(i32 0) + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %p0 = getelementptr i32, ptr addrspace(7) %ptr, i32 0 + %i_vsp = load i32, ptr addrspace(7) %p0, align 4 + store i32 %i_vsp, ptr %csp, align 4 + %p1 = getelementptr i32, ptr addrspace(7) %ptr, i32 1 + %cr = load i32, ptr addrspace(7) %p1, align 4 + %p2 = getelementptr i32, ptr addrspace(7) %ptr, i32 2 + %arg = load i32, ptr addrspace(7) %p2, align 4 + %0 = load i32, ptr %csp, align 4 + call void (...) @lgc.cps.jump(i32 %cr, i32 1, i32 %0, i32 poison, i32 poison, i32 %arg, i32 %0) + unreachable +} + +declare <4 x i32> @lgc.load.user.data__v4i32(i32) #3 + +declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>, i1) #4 + +attributes #0 = { nounwind } +attributes #1 = { nounwind willreturn memory(inaccessiblemem: write) } +attributes #2 = { nounwind willreturn memory(inaccessiblemem: read) } +attributes #3 = { nounwind memory(none) } +attributes #4 = { nounwind willreturn memory(none) } + +!lgc.user.data.nodes = !{!0} +!llpc.compute.mode = !{!1} +!continuation.stackAddrspace = !{!2} + +!0 = !{!"DescriptorBuffer", i32 6, i32 6, i32 0, i32 4, i64 0, i32 0, i32 4} +!1 = !{i32 8, i32 4, i32 1, i32 0, i32 0, i32 1} +!2 = !{i32 5} +!3 = !{i32 7} +!4 = !{i32 42} +;. +; CHECK: [[META6]] = !{i32 7} +; CHECK: [[META7]] = !{i32 42} +;. diff --git a/lgc/test/Transforms/CpsLoweringWithDvgpr/cps-stack-lowering.lgc b/lgc/test/Transforms/CpsLoweringWithDvgpr/cps-stack-lowering.lgc new file mode 100644 index 0000000000..6e1db6e4c0 --- /dev/null +++ b/lgc/test/Transforms/CpsLoweringWithDvgpr/cps-stack-lowering.lgc @@ -0,0 +1,708 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 +; RUN: lgc -mcpu=gfx1201 -o - -lgc-use-init-whole-wave -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s + +; REQUIRES: llvm-main-revision-ge-511860 + +declare void @lgc.cps.jump(...) #0 + +declare ptr addrspace(32) @lgc.cps.alloc(i32) + +declare void @lgc.cps.free(i32) + +declare i32 @lgc.cps.as.continuation.reference(ptr) + +declare ptr addrspace(32) @lgc.cps.peek(i32) + +declare ptr addrspace(32) @lgc.cps.get.vsp() + +declare i32 @lgc.cps.get.dummy.index(i32) + +declare void @lgc.cps.complete() + +define void @test.0(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation !3 !continuation.state !4 { +; CHECK-LABEL: define amdgpu_cs_chain void @test.0( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], i32 [[LOCALINVOCATIONID:%.*]], i32 inreg [[MAXOUTGOINGVGPRCOUNT:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META5:![0-9]+]] !lgc.shaderstage [[META6:![0-9]+]] !continuation [[META7:![0-9]+]] !continuation.state [[META8:![0-9]+]] { +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP12:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: +; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 12 +; CHECK-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP7]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP9]], i32 0 +; CHECK-NEXT: store i32 333, ptr addrspace(5) [[TMP13]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP7]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP10]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP11]], i32 0 +; CHECK-NEXT: store i32 111, ptr addrspace(5) [[TMP44]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP7]], 9 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP15]], i32 0 +; CHECK-NEXT: store i8 99, ptr addrspace(5) [[TMP16]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.reloc.constant(metadata [[META9:![0-9]+]]) +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[VCR]], [[ALLOCASPILLBB:%.*]] ], [ [[TMP18]], [[TMP12]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ [[CSPINIT]], [[ALLOCASPILLBB]] ], [ [[TMP19]], [[TMP12]] ] +; CHECK-NEXT: [[TMP22:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ALLOCASPILLBB]] ], [ poison, [[TMP12]] ] +; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ALLOCASPILLBB]] ], [ poison, [[TMP12]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi i32 [ poison, [[ALLOCASPILLBB]] ], [ [[TMP14]], [[TMP12]] ] +; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ poison, [[ALLOCASPILLBB]] ], [ [[TMP10]], [[TMP12]] ] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP25]] to <2 x i32> +; CHECK-NEXT: [[TMP48:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } poison, i32 [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP81:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP48]], i32 [[TMP20]], 1 +; CHECK-NEXT: [[TMP82:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP81]], i32 [[TMP21]], 2 +; CHECK-NEXT: [[TMP83:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP82]], i32 [[TMP22]], 3 +; CHECK-NEXT: [[TMP84:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP83]], i32 [[TMP23]], 4 +; CHECK-NEXT: [[TMP85:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP84]], i32 [[TMP27]], 5 +; CHECK-NEXT: [[TMP86:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP85]], i32 [[TMP24]], 6 +; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP86]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP26]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP29]]) +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP28]], 3 +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP31]]) +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP28]], 2 +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP35]]) +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 [[TMP34]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP28]], 1 +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP39]]) +; CHECK-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP42]], i1 true) +; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP26]], [[TMP47]] +; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP45]]) +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP47]], -64 +; CHECK-NEXT: [[TMP50:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP49]], i64 0 +; CHECK-NEXT: [[TMP51:%.*]] = bitcast <2 x i32> [[TMP50]] to i64 +; CHECK-NEXT: [[TMP52:%.*]] = inttoptr i64 [[TMP51]] to ptr +; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP47]], 56 +; CHECK-NEXT: [[TMP54:%.*]] = shl i32 [[TMP53]], 1 +; CHECK-NEXT: [[TMP55:%.*]] = add i32 [[TMP54]], 16 +; CHECK-NEXT: [[TMP56:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP55]], i32 [[MAXOUTGOINGVGPRCOUNT]]) +; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = bitcast i64 [[TMP57]] to <2 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <2 x i32> [[TMP58]], i64 0 +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <2 x i32> [[TMP58]], i64 1 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[TMP59]], i64 1 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[TMP60]], i64 2 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[MAXOUTGOINGVGPRCOUNT]], i64 16 +; CHECK-NEXT: [[TMP78:%.*]] = insertelement <20 x i32> [[TMP77]], i32 [[TMP49]], i64 17 +; CHECK-NEXT: [[TMP79:%.*]] = insertelement <20 x i32> [[TMP78]], i32 [[TMP46]], i64 18 +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <20 x i32> [[TMP79]], i32 [[TMP56]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32i32i32i32i32i32i32s(ptr inreg [[TMP52]], i32 inreg [[TMP46]], <20 x i32> inreg [[TMP80]], { i32, i32, i32, i32, i32, i32, i32 } [[TMP86]], i32 1, i32 [[TMP56]], i32 -1, ptr @retry_vgpr_alloc.v20i32) +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %csp = alloca i32, align 4 + store i32 %cspInit, ptr %csp, align 4 + %0 = load i32, ptr %csp, align 4 + %1 = add i32 %0, 12 + store i32 %1, ptr %csp, align 4 + %2 = inttoptr i32 %0 to ptr addrspace(5) + %3 = getelementptr i8, ptr addrspace(5) %2, i32 0 + store i32 333, ptr addrspace(5) %3, align 4 + %4 = add i32 %0, 4 + %5 = inttoptr i32 %4 to ptr addrspace(5) + %6 = getelementptr i8, ptr addrspace(5) %5, i32 0 + store i32 111, ptr addrspace(5) %6, align 4 + %7 = add i32 %0, 9 + %8 = inttoptr i32 %7 to ptr addrspace(5) + %9 = getelementptr i8, ptr addrspace(5) %8, i32 0 + store i8 99, ptr addrspace(5) %9, align 1 + %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) + %10 = load i32, ptr %csp, align 4 + call void (...) @lgc.cps.jump(i32 %cr, i32 2, i32 %10, i32 poison, i32 poison, i32 %7, i32 %4) + unreachable +} + +define void @test.1(i32 %cspInit, i32 %p2, i32 %q1) !lgc.cps !1 !lgc.shaderstage !2 !continuation !5 !continuation.state !4 { +; CHECK-LABEL: define amdgpu_cs_chain void @test.1( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], i32 [[LOCALINVOCATIONID:%.*]], i32 inreg [[MAXOUTGOINGVGPRCOUNT:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[P2:%.*]], i32 [[Q1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META5]] !lgc.shaderstage [[META6]] !continuation [[META10:![0-9]+]] !continuation.state [[META8]] { +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[ALLOCASPILLBB:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: +; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i32 [[Q1]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP7]], i32 0 +; CHECK-NEXT: [[N111:%.*]] = load i32, ptr addrspace(5) [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i32 [[P2]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP9]], i32 0 +; CHECK-NEXT: [[N99:%.*]] = load i8, ptr addrspace(5) [[TMP10]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.reloc.constant(metadata [[META11:![0-9]+]]) +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP36:%.*]] = phi i32 [ [[VCR]], [[ENTRY_BLOCK:%.*]] ], [ [[TMP12]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi i32 [ [[CSPINIT]], [[ENTRY_BLOCK]] ], [ [[TMP13]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[P2]], [[ENTRY_BLOCK]] ], [ poison, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[Q1]], [[ENTRY_BLOCK]] ], [ poison, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = insertvalue { i32, i32, i32, i32, i32 } poison, i32 [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP74:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP73]], i32 [[TMP36]], 1 +; CHECK-NEXT: [[TMP75:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP74]], i32 [[TMP40]], 2 +; CHECK-NEXT: [[TMP76:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP75]], i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP77:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP76]], i32 [[TMP17]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { i32, i32, i32, i32, i32 } [[TMP77]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP23]]) +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP22]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP20]], 2 +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP27]]) +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP26]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP20]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP31]]) +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP34]], i1 true) +; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP18]], i32 [[TMP35]]) +; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP18]], [[TMP39]] +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP37]]) +; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP39]], -64 +; CHECK-NEXT: [[TMP42:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP41]], i64 0 +; CHECK-NEXT: [[TMP43:%.*]] = bitcast <2 x i32> [[TMP42]] to i64 +; CHECK-NEXT: [[TMP44:%.*]] = inttoptr i64 [[TMP43]] to ptr +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP39]], 56 +; CHECK-NEXT: [[TMP46:%.*]] = shl i32 [[TMP45]], 1 +; CHECK-NEXT: [[TMP47:%.*]] = add i32 [[TMP46]], 16 +; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP47]], i32 [[MAXOUTGOINGVGPRCOUNT]]) +; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP50:%.*]] = bitcast i64 [[TMP49]] to <2 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x i32> [[TMP50]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <2 x i32> [[TMP50]], i64 1 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[TMP51]], i64 1 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[TMP52]], i64 2 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[MAXOUTGOINGVGPRCOUNT]], i64 16 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[TMP41]], i64 17 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[TMP38]], i64 18 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[TMP48]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32i32i32i32i32s(ptr inreg [[TMP44]], i32 inreg [[TMP38]], <20 x i32> inreg [[TMP72]], { i32, i32, i32, i32, i32 } [[TMP77]], i32 1, i32 [[TMP48]], i32 -1, ptr @retry_vgpr_alloc.v20i32) +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %csp = alloca i32, align 4 + store i32 %cspInit, ptr %csp, align 4 + %0 = inttoptr i32 %q1 to ptr addrspace(5) + %1 = getelementptr i8, ptr addrspace(5) %0, i32 0 + %n111 = load i32, ptr addrspace(5) %1, align 4 + %2 = inttoptr i32 %p2 to ptr addrspace(5) + %3 = getelementptr i8, ptr addrspace(5) %2, i32 0 + %n99 = load i8, ptr addrspace(5) %3, align 1 + %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.2) + %4 = load i32, ptr %csp, align 4 + call void (...) @lgc.cps.jump(i32 %cr, i32 2, i32 %4, i32 poison, i32 poison) + unreachable +} + +define void @test.2(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation !6 !continuation.state !4 { +; CHECK-LABEL: define amdgpu_cs_chain void @test.2( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], i32 [[LOCALINVOCATIONID:%.*]], i32 inreg [[MAXOUTGOINGVGPRCOUNT:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META5]] !lgc.shaderstage [[META6]] !continuation [[META12:![0-9]+]] !continuation.state [[META8]] { +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[ALLOCASPILLBB:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: +; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], -12 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP9]], i32 0 +; CHECK-NEXT: [[N333:%.*]] = load i32, ptr addrspace(5) [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -12 +; CHECK-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP34:%.*]] = phi i32 [ [[VCR]], [[ENTRY_BLOCK:%.*]] ], [ 0, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP38:%.*]] = phi i32 [ [[CSPINIT]], [[ENTRY_BLOCK]] ], [ poison, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY_BLOCK]] ], [ poison, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP46:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY_BLOCK]] ], [ poison, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP14]] to <2 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = insertvalue { i32, i32, i32, i32, i32 } poison, i32 [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP40]], i32 [[TMP34]], 1 +; CHECK-NEXT: [[TMP42:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP41]], i32 [[TMP38]], 2 +; CHECK-NEXT: [[TMP43:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP42]], i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP44:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP43]], i32 [[TMP46]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { i32, i32, i32, i32, i32 } [[TMP44]], 1 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP16]], 7 +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP18]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP20]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP18]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP25]]) +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 [[TMP24]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP18]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP29]]) +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 [[TMP28]] +; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP32]], i1 true) +; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP16]], i32 [[TMP33]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP16]], [[TMP37]] +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP35]]) +; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[TMP39]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] +; CHECK: chain.block: +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP37]], -64 +; CHECK-NEXT: [[TMP77:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP45]], i64 0 +; CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i32> [[TMP77]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP37]], 56 +; CHECK-NEXT: [[TMP50:%.*]] = shl i32 [[TMP49]], 1 +; CHECK-NEXT: [[TMP51:%.*]] = add i32 [[TMP50]], 16 +; CHECK-NEXT: [[TMP52:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP51]], i32 [[MAXOUTGOINGVGPRCOUNT]]) +; CHECK-NEXT: [[TMP53:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP54:%.*]] = bitcast i64 [[TMP53]] to <2 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <2 x i32> [[TMP54]], i64 0 +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x i32> [[TMP54]], i64 1 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[TMP55]], i64 1 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[TMP56]], i64 2 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[MAXOUTGOINGVGPRCOUNT]], i64 16 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[TMP45]], i64 17 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[TMP36]], i64 18 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[TMP52]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32i32i32i32i32s(ptr inreg [[TMP48]], i32 inreg [[TMP36]], <20 x i32> inreg [[TMP76]], { i32, i32, i32, i32, i32 } [[TMP44]], i32 1, i32 [[TMP52]], i32 -1, ptr @retry_vgpr_alloc.v20i32) +; CHECK-NEXT: unreachable +; CHECK: ret.block: +; CHECK-NEXT: ret void +; +AllocaSpillBB: + %csp = alloca i32, align 4 + store i32 %cspInit, ptr %csp, align 4 + %0 = load i32, ptr %csp, align 4 + %1 = add i32 %0, -12 + %2 = inttoptr i32 %1 to ptr addrspace(5) + %3 = getelementptr i8, ptr addrspace(5) %2, i32 0 + %n333 = load i32, ptr addrspace(5) %3, align 4 + %4 = load i32, ptr %csp, align 4 + %5 = add i32 %4, -12 + store i32 %5, ptr %csp, align 4 + ret void +} + +define void @test.gep(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation !7 !continuation.state !4 { +; CHECK-LABEL: define amdgpu_cs_chain void @test.gep( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], i32 [[LOCALINVOCATIONID:%.*]], i32 inreg [[MAXOUTGOINGVGPRCOUNT:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META5]] !lgc.shaderstage [[META6]] !continuation [[META13:![0-9]+]] !continuation.state [[META8]] { +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[ALLOCASPILLBB:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: +; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 12 +; CHECK-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[STACK_EL0:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 0) +; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[STACK_EL0]], 24 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP10]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP12]], i32 0 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[TMP13]], align 4 +; CHECK-NEXT: [[STACK_EL1:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 1) +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[STACK_EL1]], 24 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP7]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], -4 +; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP15]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP18]], i32 0 +; CHECK-NEXT: store i32 [[TMP17]], ptr addrspace(5) [[TMP19]], align 4 +; CHECK-NEXT: [[STACK_EL2:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 2) +; CHECK-NEXT: [[STACK_EL2_DIV:%.*]] = sdiv i32 [[STACK_EL2]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP7]], 8 +; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[STACK_EL2_DIV]], 24 +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], -8 +; CHECK-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP22]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP25]], i32 0 +; CHECK-NEXT: store i32 [[TMP24]], ptr addrspace(5) [[TMP26]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.reloc.constant(metadata [[META9]]) +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP54:%.*]] = phi i32 [ [[VCR]], [[ENTRY_BLOCK:%.*]] ], [ [[TMP28]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP58:%.*]] = phi i32 [ [[CSPINIT]], [[ENTRY_BLOCK]] ], [ [[TMP29]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP32:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY_BLOCK]] ], [ poison, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP33:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY_BLOCK]] ], [ poison, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP35:%.*]] = phi i32 [ poison, [[ENTRY_BLOCK]] ], [ [[TMP24]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP37:%.*]] = phi i32 [ poison, [[ENTRY_BLOCK]] ], [ [[TMP24]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP34]] to <2 x i32> +; CHECK-NEXT: [[TMP91:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } poison, i32 [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP92:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP91]], i32 [[TMP54]], 1 +; CHECK-NEXT: [[TMP93:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP92]], i32 [[TMP58]], 2 +; CHECK-NEXT: [[TMP94:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP93]], i32 [[TMP32]], 3 +; CHECK-NEXT: [[TMP95:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP94]], i32 [[TMP33]], 4 +; CHECK-NEXT: [[TMP96:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP95]], i32 [[TMP35]], 5 +; CHECK-NEXT: [[TMP97:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP96]], i32 [[TMP37]], 6 +; CHECK-NEXT: [[TMP36:%.*]] = extractvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP97]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP36]], 7 +; CHECK-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP39]]) +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP38]], 3 +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 [[TMP40]] +; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP38]], 2 +; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP45]]) +; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 [[TMP44]] +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i32 [[TMP38]], 1 +; CHECK-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP49]]) +; CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +; CHECK-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 [[TMP48]] +; CHECK-NEXT: [[TMP53:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP52]], i1 true) +; CHECK-NEXT: [[TMP57:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP36]], i32 [[TMP53]]) +; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i32 [[TMP36]], [[TMP57]] +; CHECK-NEXT: [[TMP56:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP55]]) +; CHECK-NEXT: [[TMP59:%.*]] = and i32 [[TMP57]], -64 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP59]], i64 0 +; CHECK-NEXT: [[TMP61:%.*]] = bitcast <2 x i32> [[TMP60]] to i64 +; CHECK-NEXT: [[TMP62:%.*]] = inttoptr i64 [[TMP61]] to ptr +; CHECK-NEXT: [[TMP63:%.*]] = and i32 [[TMP57]], 56 +; CHECK-NEXT: [[TMP64:%.*]] = shl i32 [[TMP63]], 1 +; CHECK-NEXT: [[TMP65:%.*]] = add i32 [[TMP64]], 16 +; CHECK-NEXT: [[TMP66:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP65]], i32 [[MAXOUTGOINGVGPRCOUNT]]) +; CHECK-NEXT: [[TMP67:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP68:%.*]] = bitcast i64 [[TMP67]] to <2 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = extractelement <2 x i32> [[TMP68]], i64 0 +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <2 x i32> [[TMP68]], i64 1 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[TMP69]], i64 1 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[TMP70]], i64 2 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP78:%.*]] = insertelement <20 x i32> [[TMP77]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP79:%.*]] = insertelement <20 x i32> [[TMP78]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <20 x i32> [[TMP79]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP81:%.*]] = insertelement <20 x i32> [[TMP80]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP82:%.*]] = insertelement <20 x i32> [[TMP81]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP83:%.*]] = insertelement <20 x i32> [[TMP82]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP84:%.*]] = insertelement <20 x i32> [[TMP83]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP85:%.*]] = insertelement <20 x i32> [[TMP84]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP86:%.*]] = insertelement <20 x i32> [[TMP85]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP87:%.*]] = insertelement <20 x i32> [[TMP86]], i32 [[MAXOUTGOINGVGPRCOUNT]], i64 16 +; CHECK-NEXT: [[TMP88:%.*]] = insertelement <20 x i32> [[TMP87]], i32 [[TMP59]], i64 17 +; CHECK-NEXT: [[TMP89:%.*]] = insertelement <20 x i32> [[TMP88]], i32 [[TMP56]], i64 18 +; CHECK-NEXT: [[TMP90:%.*]] = insertelement <20 x i32> [[TMP89]], i32 [[TMP66]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32i32i32i32i32i32i32s(ptr inreg [[TMP62]], i32 inreg [[TMP56]], <20 x i32> inreg [[TMP90]], { i32, i32, i32, i32, i32, i32, i32 } [[TMP97]], i32 1, i32 [[TMP66]], i32 -1, ptr @retry_vgpr_alloc.v20i32) +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %csp = alloca i32, align 4 + store i32 %cspInit, ptr %csp, align 4 + %0 = load i32, ptr %csp, align 4 + %1 = add i32 %0, 12 + store i32 %1, ptr %csp, align 4 + %stack.el0 = call i32 @lgc.cps.get.dummy.index(i32 0) + %2 = mul i32 %stack.el0, 24 + %3 = add i32 %0, %2 + %4 = load i32, ptr %csp, align 4 + %5 = inttoptr i32 %3 to ptr addrspace(5) + %6 = getelementptr i8, ptr addrspace(5) %5, i32 0 + store i32 %4, ptr addrspace(5) %6, align 4 + %stack.el1 = call i32 @lgc.cps.get.dummy.index(i32 1) + %7 = mul i32 %stack.el1, 24 + %8 = add i32 %0, %7 + %9 = load i32, ptr %csp, align 4 + %10 = add i32 %9, -4 + %11 = inttoptr i32 %8 to ptr addrspace(5) + %12 = getelementptr i8, ptr addrspace(5) %11, i32 0 + store i32 %10, ptr addrspace(5) %12, align 4 + %stack.el2 = call i32 @lgc.cps.get.dummy.index(i32 2) + %stack.el2.div = sdiv i32 %stack.el2, 2 + %13 = add i32 %0, 8 + %14 = mul i32 %stack.el2.div, 24 + %15 = add i32 %13, %14 + %16 = load i32, ptr %csp, align 4 + %17 = add i32 %16, -8 + %18 = inttoptr i32 %15 to ptr addrspace(5) + %19 = getelementptr i8, ptr addrspace(5) %18, i32 0 + store i32 %17, ptr addrspace(5) %19, align 4 + %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) + %20 = load i32, ptr %csp, align 4 + call void (...) @lgc.cps.jump(i32 %cr, i32 2, i32 %20, i32 poison, i32 poison, i32 %17, i32 %17) + unreachable +} + +define void @test.nested.gep(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation !8 !continuation.state !4 { +; CHECK-LABEL: define amdgpu_cs_chain void @test.nested.gep( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], i32 [[LOCALINVOCATIONID:%.*]], i32 inreg [[MAXOUTGOINGVGPRCOUNT:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META5]] !lgc.shaderstage [[META6]] !continuation [[META14:![0-9]+]] !continuation.state [[META8]] { +; CHECK-NEXT: AllocaSpillBB: +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[ALLOCASPILLBB:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: +; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 12 +; CHECK-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[STACK_EL0:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 0) +; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[STACK_EL0]], 24 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP11]] to ptr addrspace(5) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP13]], i32 0 +; CHECK-NEXT: store i32 [[TMP12]], ptr addrspace(5) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.reloc.constant(metadata [[META9]]) +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], 1 +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ [[VCR]], [[ENTRY_BLOCK:%.*]] ], [ [[TMP16]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP46:%.*]] = phi i32 [ [[CSPINIT]], [[ENTRY_BLOCK]] ], [ [[TMP17]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY_BLOCK]] ], [ poison, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY_BLOCK]] ], [ poison, [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ poison, [[ENTRY_BLOCK]] ], [ [[TMP12]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP25:%.*]] = phi i32 [ poison, [[ENTRY_BLOCK]] ], [ [[TMP12]], [[ALLOCASPILLBB]] ] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP22]] to <2 x i32> +; CHECK-NEXT: [[TMP79:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } poison, i32 [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP80:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP79]], i32 [[TMP42]], 1 +; CHECK-NEXT: [[TMP81:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP80]], i32 [[TMP46]], 2 +; CHECK-NEXT: [[TMP82:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP81]], i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP83:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP82]], i32 [[TMP21]], 4 +; CHECK-NEXT: [[TMP84:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP83]], i32 [[TMP23]], 5 +; CHECK-NEXT: [[TMP85:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP84]], i32 [[TMP25]], 6 +; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { i32, i32, i32, i32, i32, i32, i32 } [[TMP85]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP24]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP27]]) +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP26]], 3 +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP29]]) +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 [[TMP28]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[TMP26]], 2 +; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP33]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 [[TMP32]] +; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP26]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP37]]) +; CHECK-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP40]], i1 true) +; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP24]], i32 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[TMP24]], [[TMP45]] +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP43]]) +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP45]], -64 +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP47]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = bitcast <2 x i32> [[TMP48]] to i64 +; CHECK-NEXT: [[TMP50:%.*]] = inttoptr i64 [[TMP49]] to ptr +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP45]], 56 +; CHECK-NEXT: [[TMP52:%.*]] = shl i32 [[TMP51]], 1 +; CHECK-NEXT: [[TMP53:%.*]] = add i32 [[TMP52]], 16 +; CHECK-NEXT: [[TMP54:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP53]], i32 [[MAXOUTGOINGVGPRCOUNT]]) +; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP56:%.*]] = bitcast i64 [[TMP55]] to <2 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = extractelement <2 x i32> [[TMP56]], i64 0 +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <2 x i32> [[TMP56]], i64 1 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[TMP57]], i64 1 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[TMP58]], i64 2 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[MAXOUTGOINGVGPRCOUNT]], i64 16 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[TMP47]], i64 17 +; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[TMP44]], i64 18 +; CHECK-NEXT: [[TMP78:%.*]] = insertelement <20 x i32> [[TMP77]], i32 [[TMP54]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32i32i32i32i32i32i32s(ptr inreg [[TMP50]], i32 inreg [[TMP44]], <20 x i32> inreg [[TMP78]], { i32, i32, i32, i32, i32, i32, i32 } [[TMP85]], i32 1, i32 [[TMP54]], i32 -1, ptr @retry_vgpr_alloc.v20i32) +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %csp = alloca i32, align 4 + store i32 %cspInit, ptr %csp, align 4 + %0 = load i32, ptr %csp, align 4 + %1 = add i32 %0, 12 + store i32 %1, ptr %csp, align 4 + %stack.el0 = call i32 @lgc.cps.get.dummy.index(i32 0) + %2 = mul i32 %stack.el0, 24 + %3 = add i32 %0, %2 + %4 = add i32 %3, 16 + %5 = load i32, ptr %csp, align 4 + %6 = inttoptr i32 %4 to ptr addrspace(5) + %7 = getelementptr i8, ptr addrspace(5) %6, i32 0 + store i32 %5, ptr addrspace(5) %7, align 4 + %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) + %8 = load i32, ptr %csp, align 4 + call void (...) @lgc.cps.jump(i32 %cr, i32 2, i32 %8, i32 poison, i32 poison, i32 %5, i32 %5) + unreachable +} + +declare !continuation !3 { ptr, ptr } @continuation.prototype.test.0(ptr, i1) + +declare ptr @continuation.malloc(i32) + +declare void @continuation.free(ptr) + +declare token @llvm.coro.id.retcon(i32, i32, ptr, ptr, ptr, ptr) #1 + +declare ptr @llvm.coro.begin(token, ptr writeonly) #1 + +declare !continuation !5 { ptr, ptr } @continuation.prototype.test.1(ptr, i1) + +declare !continuation !6 { ptr, ptr } @continuation.prototype.test.2(ptr, i1) + +declare !continuation !7 { ptr, ptr } @continuation.prototype.test.gep(ptr, i1) + +declare !continuation !8 { ptr, ptr } @continuation.prototype.test.nested.gep(ptr, i1) + +attributes #0 = { noreturn } +attributes #1 = { nounwind } + +!continuation.stackAddrspace = !{!0} +!lgc.cps.maxArgumentVgprs = !{!9} + +!0 = !{i32 5} +!1 = !{i32 1} +!2 = !{i32 7} +!3 = !{ptr @test.0} +!4 = !{i32 0} +!5 = !{ptr @test.1} +!6 = !{ptr @test.2} +!7 = !{ptr @test.gep} +!8 = !{ptr @test.nested.gep} +!9 = !{i32 3} +;. +; CHECK: [[META5]] = !{i32 1} +; CHECK: [[META6]] = !{i32 7} +; CHECK: [[META7]] = !{ptr @test.0} +; CHECK: [[META8]] = !{i32 0} +; CHECK: [[META9]] = !{!"_dvgpr$test.1"} +; CHECK: [[META10]] = !{ptr @test.1} +; CHECK: [[META11]] = !{!"_dvgpr$test.2"} +; CHECK: [[META12]] = !{ptr @test.2} +; CHECK: [[META13]] = !{ptr @test.gep} +; CHECK: [[META14]] = !{ptr @test.nested.gep} +;. diff --git a/lgc/test/Transforms/CpsLoweringWithDvgpr/cps-unify-exits.lgc b/lgc/test/Transforms/CpsLoweringWithDvgpr/cps-unify-exits.lgc new file mode 100644 index 0000000000..dfa9d4df4f --- /dev/null +++ b/lgc/test/Transforms/CpsLoweringWithDvgpr/cps-unify-exits.lgc @@ -0,0 +1,388 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 +; RUN: lgc -mcpu=gfx1201 -o - -lgc-use-init-whole-wave -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s + +; REQUIRES: llvm-main-revision-ge-511860 + +declare void @lgc.cps.jump(...) #0 + +define void @unify_jumps(i32 %arg, ptr %table) !lgc.cps !1 !lgc.shaderstage !2 { +; CHECK-LABEL: define amdgpu_cs_chain void @unify_jumps( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], i32 [[LOCALINVOCATIONID:%.*]], i32 inreg [[MAXOUTGOINGVGPRCOUNT:%.*]], i32 [[VCR:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]], i32 [[INACTIVE_VGPR2:%.*]], i32 [[INACTIVE_VGPR3:%.*]], i32 [[INACTIVE_VGPR4:%.*]], i32 [[INACTIVE_VGPR5:%.*]], i32 [[INACTIVE_VGPR6:%.*]], i32 [[INACTIVE_VGPR7:%.*]], i32 [[INACTIVE_VGPR8:%.*]], i32 [[INACTIVE_VGPR9:%.*]], i32 [[INACTIVE_VGPR10:%.*]], i32 [[INACTIVE_VGPR11:%.*]], i32 [[INACTIVE_VGPR12:%.*]], i32 [[INACTIVE_VGPR13:%.*]], i32 [[INACTIVE_VGPR14:%.*]], i32 [[INACTIVE_VGPR15:%.*]], i32 [[INACTIVE_VGPR16:%.*]], i32 [[INACTIVE_VGPR17:%.*]], i32 [[INACTIVE_VGPR18:%.*]], i32 [[INACTIVE_VGPR19:%.*]], i32 [[INACTIVE_VGPR20:%.*]], i32 [[INACTIVE_VGPR21:%.*]], i32 [[INACTIVE_VGPR22:%.*]], i32 [[INACTIVE_VGPR23:%.*]], i32 [[INACTIVE_VGPR24:%.*]], i32 [[INACTIVE_VGPR25:%.*]], i32 [[INACTIVE_VGPR26:%.*]], i32 [[INACTIVE_VGPR27:%.*]], i32 [[INACTIVE_VGPR28:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META4:![0-9]+]] !lgc.shaderstage [[META5:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast <2 x i32> [[TMP26]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP38]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP76:%.*]] = ptrtoint ptr [[TABLE]] to i64 +; CHECK-NEXT: [[TMP77:%.*]] = bitcast i64 [[TMP76]] to <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP77]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP77]], i64 1 +; CHECK-NEXT: [[TMP78:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP78]], label [[TMP79:%.*]], label [[ELSE:%.*]] +; CHECK: 10: +; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ARG]], 3 +; CHECK-NEXT: br i1 [[COND]], label [[THEN1:%.*]], label [[ELSE1:%.*]] +; CHECK: then: +; CHECK-NEXT: [[TABLE_0:%.*]] = getelementptr i32, ptr [[TABLE]], i32 0 +; CHECK-NEXT: [[CR_THEN:%.*]] = load i32, ptr [[TABLE_0]], align 4 +; CHECK-NEXT: [[THEN_ARG:%.*]] = add i32 [[ARG]], 1 +; CHECK-NEXT: br label [[ELSE]] +; CHECK: else: +; CHECK-NEXT: [[TABLE_1:%.*]] = getelementptr i32, ptr [[TABLE]], i32 1 +; CHECK-NEXT: [[CR_ELSE:%.*]] = load i32, ptr [[TABLE_1]], align 4 +; CHECK-NEXT: [[ELSE_ARG:%.*]] = uitofp i32 [[ARG]] to float +; CHECK-NEXT: [[TMP10:%.*]] = bitcast float [[ELSE_ARG]] to i32 +; CHECK-NEXT: br label [[ELSE]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[VCR]], [[ENTRY_BLOCK:%.*]] ], [ [[CR_ELSE]], [[ELSE1]] ], [ [[CR_THEN]], [[THEN1]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[ARG]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP7]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP8]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP81:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY_BLOCK]] ], [ [[TMP10]], [[ELSE1]] ], [ [[THEN_ARG]], [[THEN1]] ] +; CHECK-NEXT: [[TMP82:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY_BLOCK]] ], [ 5, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[INACTIVE_VGPR2]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[INACTIVE_VGPR3]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP83:%.*]] = phi i32 [ [[INACTIVE_VGPR4]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP84:%.*]] = phi i32 [ [[INACTIVE_VGPR5]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP85:%.*]] = phi i32 [ [[INACTIVE_VGPR6]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP86:%.*]] = phi i32 [ [[INACTIVE_VGPR7]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[INACTIVE_VGPR8]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP87:%.*]] = phi i32 [ [[INACTIVE_VGPR9]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP88:%.*]] = phi i32 [ [[INACTIVE_VGPR10]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP89:%.*]] = phi i32 [ [[INACTIVE_VGPR11]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP90:%.*]] = phi i32 [ [[INACTIVE_VGPR12]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP91:%.*]] = phi i32 [ [[INACTIVE_VGPR13]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP92:%.*]] = phi i32 [ [[INACTIVE_VGPR14]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP93:%.*]] = phi i32 [ [[INACTIVE_VGPR15]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP94:%.*]] = phi i32 [ [[INACTIVE_VGPR16]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP95:%.*]] = phi i32 [ [[INACTIVE_VGPR17]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP96:%.*]] = phi i32 [ [[INACTIVE_VGPR18]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP98:%.*]] = phi i32 [ [[INACTIVE_VGPR19]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP99:%.*]] = phi i32 [ [[INACTIVE_VGPR20]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP100:%.*]] = phi i32 [ [[INACTIVE_VGPR21]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP101:%.*]] = phi i32 [ [[INACTIVE_VGPR22]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP102:%.*]] = phi i32 [ [[INACTIVE_VGPR23]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP103:%.*]] = phi i32 [ [[INACTIVE_VGPR24]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP41:%.*]] = phi i32 [ [[INACTIVE_VGPR25]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP104:%.*]] = phi i32 [ [[INACTIVE_VGPR26]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[INACTIVE_VGPR27]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP105:%.*]] = phi i32 [ [[INACTIVE_VGPR28]], [[ENTRY_BLOCK]] ], [ poison, [[ELSE1]] ], [ poison, [[THEN1]] ] +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP106:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } poison, i32 [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP107:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP106]], i32 [[TMP17]], 1 +; CHECK-NEXT: [[TMP108:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP107]], i32 [[TMP12]], 2 +; CHECK-NEXT: [[TMP109:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP108]], i32 [[TMP13]], 3 +; CHECK-NEXT: [[TMP110:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP109]], i32 [[TMP14]], 4 +; CHECK-NEXT: [[TMP111:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP110]], i32 [[TMP81]], 5 +; CHECK-NEXT: [[TMP112:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP111]], i32 [[TMP82]], 6 +; CHECK-NEXT: [[TMP113:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP112]], i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP114:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP113]], i32 [[TMP19]], 8 +; CHECK-NEXT: [[TMP115:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP114]], i32 [[TMP83]], 9 +; CHECK-NEXT: [[TMP116:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP115]], i32 [[TMP84]], 10 +; CHECK-NEXT: [[TMP117:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP116]], i32 [[TMP85]], 11 +; CHECK-NEXT: [[TMP118:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP117]], i32 [[TMP86]], 12 +; CHECK-NEXT: [[TMP119:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP118]], i32 [[TMP24]], 13 +; CHECK-NEXT: [[TMP120:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP119]], i32 [[TMP87]], 14 +; CHECK-NEXT: [[TMP121:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP120]], i32 [[TMP88]], 15 +; CHECK-NEXT: [[TMP122:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP121]], i32 [[TMP89]], 16 +; CHECK-NEXT: [[TMP123:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP122]], i32 [[TMP90]], 17 +; CHECK-NEXT: [[TMP124:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP123]], i32 [[TMP91]], 18 +; CHECK-NEXT: [[TMP125:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP124]], i32 [[TMP92]], 19 +; CHECK-NEXT: [[TMP126:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP125]], i32 [[TMP93]], 20 +; CHECK-NEXT: [[TMP127:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP126]], i32 [[TMP94]], 21 +; CHECK-NEXT: [[TMP128:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP127]], i32 [[TMP95]], 22 +; CHECK-NEXT: [[TMP129:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP128]], i32 [[TMP96]], 23 +; CHECK-NEXT: [[TMP130:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP129]], i32 [[TMP98]], 24 +; CHECK-NEXT: [[TMP131:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP130]], i32 [[TMP99]], 25 +; CHECK-NEXT: [[TMP132:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP131]], i32 [[TMP100]], 26 +; CHECK-NEXT: [[TMP133:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP132]], i32 [[TMP101]], 27 +; CHECK-NEXT: [[TMP134:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP133]], i32 [[TMP102]], 28 +; CHECK-NEXT: [[TMP135:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP134]], i32 [[TMP103]], 29 +; CHECK-NEXT: [[TMP136:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP135]], i32 [[TMP41]], 30 +; CHECK-NEXT: [[TMP137:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP136]], i32 [[TMP104]], 31 +; CHECK-NEXT: [[TMP138:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP137]], i32 [[TMP43]], 32 +; CHECK-NEXT: [[TMP80:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP138]], i32 [[TMP105]], 33 +; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP80]], 1 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP25]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP28]]) +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP27]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 [[TMP20]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP27]], 2 +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP30]]) +; CHECK-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP31]], i32 [[TMP29]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP27]], 1 +; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP34]]) +; CHECK-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 [[TMP35]], i32 [[TMP33]] +; CHECK-NEXT: [[TMP97:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP37]], i1 true) +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP25]], i32 [[TMP97]]) +; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP25]], [[TMP42]] +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP39]]) +; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP42]], -64 +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP44]], i64 0 +; CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i32> [[TMP45]] to i64 +; CHECK-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr +; CHECK-NEXT: [[TMP48:%.*]] = and i32 [[TMP42]], 56 +; CHECK-NEXT: [[TMP49:%.*]] = shl i32 [[TMP48]], 1 +; CHECK-NEXT: [[TMP50:%.*]] = add i32 [[TMP49]], 16 +; CHECK-NEXT: [[TMP51:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP50]], i32 [[MAXOUTGOINGVGPRCOUNT]]) +; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP53:%.*]] = bitcast i64 [[TMP52]] to <2 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <2 x i32> [[TMP53]], i64 0 +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <2 x i32> [[TMP53]], i64 1 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[TMP54]], i64 1 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[TMP55]], i64 2 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[MAXOUTGOINGVGPRCOUNT]], i64 16 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[TMP44]], i64 17 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[TMP40]], i64 18 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[TMP51]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32s(ptr inreg [[TMP47]], i32 inreg [[TMP40]], <20 x i32> inreg [[TMP75]], { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP80]], i32 1, i32 [[TMP51]], i32 -1, ptr @retry_vgpr_alloc.v20i32) +; CHECK-NEXT: unreachable +; +entry: + %cond = icmp ult i32 %arg, 3 + br i1 %cond, label %then, label %else + +then: ; preds = %entry + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0, align 4 + %then.arg = add i32 %arg, 1 + call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, i32 poison, i32 poison, i32 poison, i32 %then.arg) + unreachable + +else: ; preds = %entry + %table.1 = getelementptr i32, ptr %table, i32 1 + %cr.else = load i32, ptr %table.1, align 4 + %else.arg = uitofp i32 %arg to float + call void (...) @lgc.cps.jump(i32 %cr.else, i32 2, i32 poison, i32 poison, i32 poison, float %else.arg, i32 5) + unreachable +} + +define void @unify_jump_ret(i32 %arg, ptr %table) !lgc.cps !1 !lgc.shaderstage !2 { +; CHECK-LABEL: define amdgpu_cs_chain void @unify_jump_ret( +; CHECK-SAME: i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], i32 [[LOCALINVOCATIONID:%.*]], i32 inreg [[MAXOUTGOINGVGPRCOUNT:%.*]], i32 [[VCR:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]], i32 [[INACTIVE_VGPR2:%.*]], i32 [[INACTIVE_VGPR3:%.*]], i32 [[INACTIVE_VGPR4:%.*]], i32 [[INACTIVE_VGPR5:%.*]], i32 [[INACTIVE_VGPR6:%.*]], i32 [[INACTIVE_VGPR7:%.*]], i32 [[INACTIVE_VGPR8:%.*]], i32 [[INACTIVE_VGPR9:%.*]], i32 [[INACTIVE_VGPR10:%.*]], i32 [[INACTIVE_VGPR11:%.*]], i32 [[INACTIVE_VGPR12:%.*]], i32 [[INACTIVE_VGPR13:%.*]], i32 [[INACTIVE_VGPR14:%.*]], i32 [[INACTIVE_VGPR15:%.*]], i32 [[INACTIVE_VGPR16:%.*]], i32 [[INACTIVE_VGPR17:%.*]], i32 [[INACTIVE_VGPR18:%.*]], i32 [[INACTIVE_VGPR19:%.*]], i32 [[INACTIVE_VGPR20:%.*]], i32 [[INACTIVE_VGPR21:%.*]], i32 [[INACTIVE_VGPR22:%.*]], i32 [[INACTIVE_VGPR23:%.*]], i32 [[INACTIVE_VGPR24:%.*]], i32 [[INACTIVE_VGPR25:%.*]], i32 [[INACTIVE_VGPR26:%.*]], i32 [[INACTIVE_VGPR27:%.*]], i32 [[INACTIVE_VGPR28:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META4]] !lgc.shaderstage [[META5]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TABLE]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i32> [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i32> [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP19:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP19]], label [[TMP74:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 10: +; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ARG]], 3 +; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[TABLE_0:%.*]] = getelementptr i32, ptr [[TABLE]], i32 0 +; CHECK-NEXT: [[CR_THEN:%.*]] = load i32, ptr [[TABLE_0]], align 4 +; CHECK-NEXT: [[THEN_ARG:%.*]] = add i32 [[ARG]], 1 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: else: +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[VCR]], [[ENTRY_BLOCK:%.*]] ], [ [[CR_THEN]], [[THEN]] ], [ 0, [[ELSE]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[ARG]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP36]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[TMP40]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY_BLOCK]] ], [ [[THEN_ARG]], [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP80:%.*]] = phi i32 [ [[INACTIVE_VGPR2]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP81:%.*]] = phi i32 [ [[INACTIVE_VGPR3]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP82:%.*]] = phi i32 [ [[INACTIVE_VGPR4]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP83:%.*]] = phi i32 [ [[INACTIVE_VGPR5]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP84:%.*]] = phi i32 [ [[INACTIVE_VGPR6]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP85:%.*]] = phi i32 [ [[INACTIVE_VGPR7]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP86:%.*]] = phi i32 [ [[INACTIVE_VGPR8]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP87:%.*]] = phi i32 [ [[INACTIVE_VGPR9]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP88:%.*]] = phi i32 [ [[INACTIVE_VGPR10]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP89:%.*]] = phi i32 [ [[INACTIVE_VGPR11]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP90:%.*]] = phi i32 [ [[INACTIVE_VGPR12]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP91:%.*]] = phi i32 [ [[INACTIVE_VGPR13]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP92:%.*]] = phi i32 [ [[INACTIVE_VGPR14]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP93:%.*]] = phi i32 [ [[INACTIVE_VGPR15]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP94:%.*]] = phi i32 [ [[INACTIVE_VGPR16]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP95:%.*]] = phi i32 [ [[INACTIVE_VGPR17]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP96:%.*]] = phi i32 [ [[INACTIVE_VGPR18]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP97:%.*]] = phi i32 [ [[INACTIVE_VGPR19]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP98:%.*]] = phi i32 [ [[INACTIVE_VGPR20]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP99:%.*]] = phi i32 [ [[INACTIVE_VGPR21]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP100:%.*]] = phi i32 [ [[INACTIVE_VGPR22]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP101:%.*]] = phi i32 [ [[INACTIVE_VGPR23]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP102:%.*]] = phi i32 [ [[INACTIVE_VGPR24]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP103:%.*]] = phi i32 [ [[INACTIVE_VGPR25]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP104:%.*]] = phi i32 [ [[INACTIVE_VGPR26]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP105:%.*]] = phi i32 [ [[INACTIVE_VGPR27]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP106:%.*]] = phi i32 [ [[INACTIVE_VGPR28]], [[ENTRY_BLOCK]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP17]] to <2 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } poison, i32 [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP108:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP107]], i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP109:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP108]], i32 [[TMP9]], 2 +; CHECK-NEXT: [[TMP110:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP109]], i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP111:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP110]], i32 [[TMP11]], 4 +; CHECK-NEXT: [[TMP112:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP111]], i32 [[TMP15]], 5 +; CHECK-NEXT: [[TMP113:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP112]], i32 [[TMP16]], 6 +; CHECK-NEXT: [[TMP114:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP113]], i32 [[TMP80]], 7 +; CHECK-NEXT: [[TMP115:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP114]], i32 [[TMP81]], 8 +; CHECK-NEXT: [[TMP116:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP115]], i32 [[TMP82]], 9 +; CHECK-NEXT: [[TMP117:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP116]], i32 [[TMP83]], 10 +; CHECK-NEXT: [[TMP118:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP117]], i32 [[TMP84]], 11 +; CHECK-NEXT: [[TMP119:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP118]], i32 [[TMP85]], 12 +; CHECK-NEXT: [[TMP120:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP119]], i32 [[TMP86]], 13 +; CHECK-NEXT: [[TMP121:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP120]], i32 [[TMP87]], 14 +; CHECK-NEXT: [[TMP122:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP121]], i32 [[TMP88]], 15 +; CHECK-NEXT: [[TMP123:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP122]], i32 [[TMP89]], 16 +; CHECK-NEXT: [[TMP124:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP123]], i32 [[TMP90]], 17 +; CHECK-NEXT: [[TMP125:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP124]], i32 [[TMP91]], 18 +; CHECK-NEXT: [[TMP126:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP125]], i32 [[TMP92]], 19 +; CHECK-NEXT: [[TMP127:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP126]], i32 [[TMP93]], 20 +; CHECK-NEXT: [[TMP128:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP127]], i32 [[TMP94]], 21 +; CHECK-NEXT: [[TMP129:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP128]], i32 [[TMP95]], 22 +; CHECK-NEXT: [[TMP130:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP129]], i32 [[TMP96]], 23 +; CHECK-NEXT: [[TMP131:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP130]], i32 [[TMP97]], 24 +; CHECK-NEXT: [[TMP132:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP131]], i32 [[TMP98]], 25 +; CHECK-NEXT: [[TMP133:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP132]], i32 [[TMP99]], 26 +; CHECK-NEXT: [[TMP134:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP133]], i32 [[TMP100]], 27 +; CHECK-NEXT: [[TMP135:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP134]], i32 [[TMP101]], 28 +; CHECK-NEXT: [[TMP75:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP135]], i32 [[TMP102]], 29 +; CHECK-NEXT: [[TMP76:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP75]], i32 [[TMP103]], 30 +; CHECK-NEXT: [[TMP77:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP76]], i32 [[TMP104]], 31 +; CHECK-NEXT: [[TMP78:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP77]], i32 [[TMP105]], 32 +; CHECK-NEXT: [[TMP79:%.*]] = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP78]], i32 [[TMP106]], 33 +; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP79]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP23]]) +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP22]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP20]], 2 +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP27]]) +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP26]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP20]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP31]]) +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP34]], i1 true) +; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP18]], i32 [[TMP35]]) +; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP18]], [[TMP39]] +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP37]]) +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP39]], 0 +; CHECK-NEXT: br i1 [[TMP41]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] +; CHECK: chain.block: +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], -64 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP42]], i64 0 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast <2 x i32> [[TMP43]] to i64 +; CHECK-NEXT: [[TMP45:%.*]] = inttoptr i64 [[TMP44]] to ptr +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP39]], 56 +; CHECK-NEXT: [[TMP47:%.*]] = shl i32 [[TMP46]], 1 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 16 +; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP48]], i32 [[MAXOUTGOINGVGPRCOUNT]]) +; CHECK-NEXT: [[TMP50:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP51:%.*]] = bitcast i64 [[TMP50]] to <2 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <2 x i32> [[TMP51]], i64 0 +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <2 x i32> [[TMP51]], i64 1 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[TMP52]], i64 1 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[TMP53]], i64 2 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[MAXOUTGOINGVGPRCOUNT]], i64 16 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[TMP42]], i64 17 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[TMP38]], i64 18 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[TMP49]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32s(ptr inreg [[TMP45]], i32 inreg [[TMP38]], <20 x i32> inreg [[TMP73]], { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP79]], i32 1, i32 [[TMP49]], i32 -1, ptr @retry_vgpr_alloc.v20i32) +; CHECK-NEXT: unreachable +; CHECK: ret.block: +; CHECK-NEXT: ret void +; +entry: + %cond = icmp ult i32 %arg, 3 + br i1 %cond, label %then, label %else + +then: ; preds = %entry + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0, align 4 + %then.arg = add i32 %arg, 1 + call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, i32 poison, i32 poison, i32 poison, i32 %then.arg) + unreachable + +else: ; preds = %entry + ret void +} + +attributes #0 = { noreturn } + +!continuation.stackAddrspace = !{!0} +!lgc.cps.maxArgumentVgprs = !{!3} + +!0 = !{i32 5} +!1 = !{i32 1} +!2 = !{i32 7} +!3 = !{i32 32} +;. +; CHECK: [[META4]] = !{i32 1} +; CHECK: [[META5]] = !{i32 7} diff --git a/lgc/test/Transforms/CpsLoweringWithDvgpr/lit.local.cfg b/lgc/test/Transforms/CpsLoweringWithDvgpr/lit.local.cfg new file mode 100644 index 0000000000..2c5115503d --- /dev/null +++ b/lgc/test/Transforms/CpsLoweringWithDvgpr/lit.local.cfg @@ -0,0 +1,27 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +if "llpc_build_gfx12" not in config.available_features: + config.unsupported = True diff --git a/lgc/test/shaderdb/gfx12/CsBPermuteWave64.lgc b/lgc/test/shaderdb/gfx12/CsBPermuteWave64.lgc new file mode 100644 index 0000000000..fdb11064ae --- /dev/null +++ b/lgc/test/shaderdb/gfx12/CsBPermuteWave64.lgc @@ -0,0 +1,48 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc +; RUN: lgc -o - --mcpu=gfx1201 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s + +define dllexport spir_func float @fn(float %value, i32 %index) !lgc.shaderstage !0 { +.entry: + %r = call float (...) @lgc.create.subgroup.shuffle.f32(float %value, i32 %index) + ret float %r +} + +declare float @lgc.create.subgroup.shuffle.f32(...) + +; ShaderStage::Compute +!0 = !{i32 7} + +; Setting Threadgroup Dimensions to 64 x 1 x 1 +!llpc.compute.mode = !{!1} +!1 = !{i32 64, i32 1, i32 1} + +; CHECK-LABEL: amdgpu_cs_main: +; CHECK: v_lshlrev_b32_e32 v1, 2, v1 +; CHECK-NEXT: ds_bpermute_b32 v0, v1, v0 +; CHECK-NEXT: s_wait_dscnt 0x0 diff --git a/lgc/test/shaderdb/gfx12/CsClusteredMultiExclusive.lgc b/lgc/test/shaderdb/gfx12/CsClusteredMultiExclusive.lgc new file mode 100644 index 0000000000..50eb09520d --- /dev/null +++ b/lgc/test/shaderdb/gfx12/CsClusteredMultiExclusive.lgc @@ -0,0 +1,199 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc +; RUN: lgc -o - --mcpu=gfx1201 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s +; REQUIRES: do-not-run-me +; ModuleID = 'lgcPipeline' +source_filename = "LLPC module" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" +target triple = "amdgcn--amdpal" + +define dllexport void @"lgc.shader.CS.AD79A66CC9667A89:CSMain"() !lgc.shaderstage !10 { + %1 = call <3 x i32> (...) @lgc.create.read.builtin.input.v3i32(i32 27, i32 0, i32 poison, i32 poison) + %2 = extractelement <3 x i32> %1, i64 0 + %3 = lshr i32 %2, 2 + %4 = call <4 x i32> (...) @lgc.create.subgroup.partition.v4i32(i32 %3) + %5 = shufflevector <4 x i32> %4, <4 x i32> , <4 x i32> + %6 = call i32 (...) @lgc.create.subgroup.clustered.multi.exclusive.i32(i32 0, i32 %3, <4 x i32> %5) + %7 = call ptr addrspace(7) @lgc.load.buffer.desc(i64 4294967296, i32 0, i32 0, i32 267) + %8 = shl i32 %2, 2 + %9 = sext i32 %8 to i64 + %10 = getelementptr i8, ptr addrspace(7) %7, i64 %9 + store i32 %6, ptr addrspace(7) %10, align 4 + ret void +} + +; Function Attrs: nounwind willreturn memory(read) +declare <3 x i32> @lgc.create.read.builtin.input.v3i32(...) #0 + +; Function Attrs: convergent nounwind +declare <4 x i32> @lgc.create.subgroup.partition.v4i32(...) #1 + +; Function Attrs: convergent nounwind +declare i32 @lgc.create.subgroup.clustered.multi.exclusive.i32(...) #1 + +; Function Attrs: nounwind willreturn memory(read) +declare ptr addrspace(7) @lgc.load.buffer.desc(i64, i32, i32, i32) local_unnamed_addr #0 + +attributes #0 = { nounwind willreturn memory(read) } +attributes #1 = { convergent nounwind } + +!llpc.compute.mode = !{!0} +!lgc.client = !{!1} +!lgc.options = !{!2} +!lgc.options.CS = !{!3} +!lgc.user.data.nodes = !{!4, !5, !6, !7, !8, !9} + +!0 = !{i32 32, i32 1, i32 1} +!1 = !{!"DX12"} +!2 = !{i32 -1503530489, i32 546690824, i32 1491131267, i32 955349830, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 256, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 256} +!3 = !{i32 -1503530481, i32 546690824, i32 1491131267, i32 955349830, i32 0, i32 0, i32 0, i32 -1, i32 -1} +!4 = !{!"DescriptorTableVaPtr", i32 7, i32 255, i32 3, i32 1, i32 2} +!5 = !{!"DescriptorMutable", i32 17, i32 0, i32 0, i32 16, i64 0, i32 0, i32 8} +!6 = !{!"DescriptorMutable", i32 17, i32 0, i32 16, i32 8, i64 4294967296, i32 0, i32 8} +!7 = !{!"DescriptorTableVaPtr", i32 7, i32 0, i32 0, i32 1, i32 2} +!8 = !{!"DescriptorConstBufferCompact", i32 15, i32 0, i32 0, i32 2, i64 10737355485, i32 1, i32 2} +!9 = !{!"DescriptorMutable", i32 17, i32 0, i32 2, i32 8, i64 6442388189, i32 0, i32 4} +!10 = !{i32 7} +; CHECK-LABEL: amdgpu_cs_main: +; CHECK: v_and_b32_e32 v1, 0x3ff, v0 +; CHECK-NEXT: v_bfe_u32 v3, v0, 2, 8 +; CHECK-NEXT: s_mov_b32 s0, s1 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_readfirstlane_b32 s2, v3 +; CHECK-NEXT: v_cmp_eq_u32_e64 s3, s2, v3 +; CHECK-NEXT: v_mov_b32_e32 v2, s2 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: s_or_b32 s1, s3, s1 +; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; CHECK-NEXT: s_cbranch_execnz .LBB0_1 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; CHECK-NEXT: v_mbcnt_lo_u32_b32 v3, -1, 0 +; CHECK-NEXT: s_getpc_b64 s[2:3] +; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_sext_i32_i16 s3, s3 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_mov_b32 s1, s3 +; CHECK-NEXT: v_bfe_u32 v4, v0, 0, v3 +; CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x40 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; CHECK-NEXT: v_clz_i32_u32_e32 v5, v4 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 +; CHECK-NEXT: v_and_b32_e32 v4, 28, v3 +; CHECK-NEXT: v_lshlrev_b32_e32 v5, 2, v5 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_add_nc_u32_e32 v4, -4, v4 +; CHECK-NEXT: v_xor_b32_e32 v5, 0x7c, v5 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) +; CHECK-NEXT: v_lshlrev_b32_e64 v4, v4, 15 +; CHECK-NEXT: ds_bpermute_b32 v2, v5, v2 +; CHECK-NEXT: v_add_nc_u32_e32 v5, -1, v3 +; CHECK-NEXT: v_and_b32_e32 v4, v4, v0 +; CHECK-NEXT: s_wait_kmcnt 0x0 +; CHECK-NEXT: s_bfe_u32 s4, s1, 0xe0010 +; CHECK-NEXT: s_and_b32 s1, s1, 0xc000ffff +; CHECK-NEXT: s_max_u32 s4, s4, 1 +; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1 +; CHECK-NEXT: s_mul_i32 s2, s4, s2 +; CHECK-NEXT: s_or_b32 s3, s3, 0x30000000 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_and_b32_e32 v5, v5, v0 +; CHECK-NEXT: v_ctz_i32_b32_e32 v7, v5 +; CHECK-NEXT: s_wait_dscnt 0x0 +; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v5 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_permlane16_var_b32 v5, v2, v7 op_sel:[0,1] +; CHECK-NEXT: s_and_b32 vcc_lo, 0xaaaaaaaa, vcc_lo +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: v_dual_cndmask_b32 v5, 0, v5 :: v_dual_and_b32 v6, 30, v3 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_add_nc_u32_e32 v6, -2, v6 +; CHECK-NEXT: v_add_nc_u32_e32 v2, v5, v2 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_lshlrev_b32_e64 v6, v6, 3 +; CHECK-NEXT: v_and_b32_e32 v6, v6, v0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_clz_i32_u32_e32 v7, v6 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 +; CHECK-NEXT: v_xor_b32_e32 v7, 31, v7 +; CHECK-NEXT: s_and_b32 vcc_lo, 0xcccccccc, vcc_lo +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_permlane16_var_b32 v6, v2, v7 op_sel:[0,1] +; CHECK-NEXT: v_clz_i32_u32_e32 v7, v4 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; CHECK-NEXT: v_xor_b32_e32 v7, 31, v7 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 +; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v6 +; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, 0xf0f0f0f0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_permlane16_var_b32 v4, v2, v7 op_sel:[0,1] +; CHECK-NEXT: v_and_b32_e32 v5, 24, v3 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: v_dual_cndmask_b32 v4, 0, v4 :: v_dual_and_b32 v3, 16, v3 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_add_nc_u32_e32 v5, -8, v5 +; CHECK-NEXT: v_add_nc_u32_e32 v3, -16, v3 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; CHECK-NEXT: v_add_nc_u32_e32 v2, v4, v2 +; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 0xff +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_lshlrev_b32_e64 v3, v3, 0xffff +; CHECK-NEXT: v_and_b32_e32 v5, v5, v0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_and_b32_e32 v0, v3, v0 +; CHECK-NEXT: v_clz_i32_u32_e32 v6, v5 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v5 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; CHECK-NEXT: v_clz_i32_u32_e32 v4, v0 +; CHECK-NEXT: v_xor_b32_e32 v6, 31, v6 +; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, 0xff00ff00 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_xor_b32_e32 v4, 31, v4 +; CHECK-NEXT: v_permlane16_var_b32 v3, v2, v6 op_sel:[0,1] +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc_lo +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; CHECK-NEXT: v_add_nc_u32_e32 v2, v3, v2 +; CHECK-NEXT: s_and_b32 vcc_lo, vcc_lo, 0xffff0000 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_permlanex16_var_b32 v0, v2, v4 op_sel:[0,1] +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v2 +; CHECK-NEXT: buffer_store_b32 v0, v1, s[0:3], null offen +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; CHECK-NEXT: s_endpgm diff --git a/lgc/test/shaderdb/gfx12/buffer.atomic.ops.scope.lgc b/lgc/test/shaderdb/gfx12/buffer.atomic.ops.scope.lgc new file mode 100644 index 0000000000..b25a4a947b --- /dev/null +++ b/lgc/test/shaderdb/gfx12/buffer.atomic.ops.scope.lgc @@ -0,0 +1,87 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 5 +; RUN: lgc -mcpu=gfx1201 -print-after=lgc-lower-buffer-operations -o 2>&1 - < %s | FileCheck --check-prefixes=CHECK %s + +define amdgpu_gfx void @atomicrmw_scope_system(<4 x i32> inreg %desc) !lgc.shaderstage !0 { +; CHECK-LABEL: define amdgpu_gfx void @atomicrmw_scope_system( +; CHECK-SAME: <4 x i32> inreg [[DESC:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[PAD0:%.*]], i32 inreg noundef [[PAD1:%.*]], i32 inreg noundef [[PAD2:%.*]], i32 inreg noundef [[PAD3:%.*]], i32 inreg noundef [[PAD4:%.*]], i32 inreg noundef [[PAD5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0:[0-9]+]] !lgc.shaderstage [[META0:![0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32{{(.v4i32)?}}(i32 1, <4 x i32> [[DESC]], i32 0, i32 0, i32 24) +; CHECK-NEXT: ret void +; + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %add = atomicrmw add ptr addrspace(7) %ptr, i32 1 monotonic, align 4 + ret void +} + +define amdgpu_gfx void @atomicrmw_scope_singlethread(<4 x i32> inreg %desc) !lgc.shaderstage !0 { +; CHECK-LABEL: define amdgpu_gfx void @atomicrmw_scope_singlethread( +; CHECK-SAME: <4 x i32> inreg [[DESC:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[PAD0:%.*]], i32 inreg noundef [[PAD1:%.*]], i32 inreg noundef [[PAD2:%.*]], i32 inreg noundef [[PAD3:%.*]], i32 inreg noundef [[PAD4:%.*]], i32 inreg noundef [[PAD5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32{{(.v4i32)?}}(i32 1, <4 x i32> [[DESC]], i32 0, i32 0, i32 0) +; CHECK-NEXT: ret void +; + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %add = atomicrmw add ptr addrspace(7) %ptr, i32 1 syncscope("singlethread") monotonic, align 4 + ret void +} + +define amdgpu_gfx void @atomicrmw_scope_agent(<4 x i32> inreg %desc) !lgc.shaderstage !0 { +; CHECK-LABEL: define amdgpu_gfx void @atomicrmw_scope_agent( +; CHECK-SAME: <4 x i32> inreg [[DESC:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[PAD0:%.*]], i32 inreg noundef [[PAD1:%.*]], i32 inreg noundef [[PAD2:%.*]], i32 inreg noundef [[PAD3:%.*]], i32 inreg noundef [[PAD4:%.*]], i32 inreg noundef [[PAD5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32{{(.v4i32)?}}(i32 1, <4 x i32> [[DESC]], i32 0, i32 0, i32 16) +; CHECK-NEXT: ret void +; + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %add = atomicrmw add ptr addrspace(7) %ptr, i32 1 syncscope("agent") monotonic, align 4 + ret void +} + +define amdgpu_gfx void @atomicrmw_scope_workgroup(<4 x i32> inreg %desc) !lgc.shaderstage !0 { +; CHECK-LABEL: define amdgpu_gfx void @atomicrmw_scope_workgroup( +; CHECK-SAME: <4 x i32> inreg [[DESC:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[PAD0:%.*]], i32 inreg noundef [[PAD1:%.*]], i32 inreg noundef [[PAD2:%.*]], i32 inreg noundef [[PAD3:%.*]], i32 inreg noundef [[PAD4:%.*]], i32 inreg noundef [[PAD5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32{{(.v4i32)?}}(i32 1, <4 x i32> [[DESC]], i32 0, i32 0, i32 8) +; CHECK-NEXT: ret void +; + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %add = atomicrmw add ptr addrspace(7) %ptr, i32 1 syncscope("workgroup") monotonic, align 4 + ret void +} + +define amdgpu_gfx void @atomicrmw_scope_wavefront(<4 x i32> inreg %desc) !lgc.shaderstage !0 { +; CHECK-LABEL: define amdgpu_gfx void @atomicrmw_scope_wavefront( +; CHECK-SAME: <4 x i32> inreg [[DESC:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[PAD0:%.*]], i32 inreg noundef [[PAD1:%.*]], i32 inreg noundef [[PAD2:%.*]], i32 inreg noundef [[PAD3:%.*]], i32 inreg noundef [[PAD4:%.*]], i32 inreg noundef [[PAD5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32{{(.v4i32)?}}(i32 1, <4 x i32> [[DESC]], i32 0, i32 0, i32 0) +; CHECK-NEXT: ret void +; + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %add = atomicrmw add ptr addrspace(7) %ptr, i32 1 syncscope("wavefront") monotonic, align 4 + ret void +} + +declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>, i1) nounwind readnone + +!0 = !{i32 7} diff --git a/lgc/test/shaderdb/gfx12/gfx1200wavematrix-load-wave64.lgc b/lgc/test/shaderdb/gfx12/gfx1200wavematrix-load-wave64.lgc new file mode 100644 index 0000000000..178fd27c7a --- /dev/null +++ b/lgc/test/shaderdb/gfx12/gfx1200wavematrix-load-wave64.lgc @@ -0,0 +1,158 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc +; RUN: lgc --mcpu=gfx1201 -o - -passes=lgc-lower-cooperative-matrix %s | FileCheck --check-prefixes=GFX12 %s + +define <2 x i32> @test_i4_32X16_ab_layout(ptr addrspace(7) %ptr) !spirv.ExecutionModel !8 !lgc.shaderstage !9 { +; GFX12-LABEL: @test_i4_32X16_ab_layout( +; GFX12-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX12-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP1]]) +; GFX12-NEXT: [[TMP3:%.*]] = srem i32 [[TMP2]], 16 +; GFX12-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP2]], 16 +; GFX12-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 8 +; GFX12-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 1 +; GFX12-NEXT: [[TMP7:%.*]] = mul i32 [[TMP5]], 128 +; GFX12-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], [[TMP6]] +; GFX12-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 0 +; GFX12-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR:%.*]], i32 [[TMP9]] +; GFX12-NEXT: [[TMP11:%.*]] = load atomic i8, ptr addrspace(7) [[TMP10]] unordered, align 1 +; GFX12-NEXT: [[TMP12:%.*]] = add i32 [[TMP8]], 128 +; GFX12-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP12]] +; GFX12-NEXT: [[TMP14:%.*]] = load atomic i8, ptr addrspace(7) [[TMP13]] unordered, align 1 +; GFX12-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], 256 +; GFX12-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP15]] +; GFX12-NEXT: [[TMP17:%.*]] = load atomic i8, ptr addrspace(7) [[TMP16]] unordered, align 1 +; GFX12-NEXT: [[TMP18:%.*]] = add i32 [[TMP8]], 384 +; GFX12-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP18]] +; GFX12-NEXT: [[TMP20:%.*]] = load atomic i8, ptr addrspace(7) [[TMP19]] unordered, align 1 +; GFX12-NEXT: [[TMP21:%.*]] = add i32 [[TMP8]], 512 +; GFX12-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP21]] +; GFX12-NEXT: [[TMP23:%.*]] = load atomic i8, ptr addrspace(7) [[TMP22]] unordered, align 1 +; GFX12-NEXT: [[TMP24:%.*]] = add i32 [[TMP8]], 640 +; GFX12-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP24]] +; GFX12-NEXT: [[TMP26:%.*]] = load atomic i8, ptr addrspace(7) [[TMP25]] unordered, align 1 +; GFX12-NEXT: [[TMP27:%.*]] = add i32 [[TMP8]], 768 +; GFX12-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP27]] +; GFX12-NEXT: [[TMP29:%.*]] = load atomic i8, ptr addrspace(7) [[TMP28]] unordered, align 1 +; GFX12-NEXT: [[TMP30:%.*]] = add i32 [[TMP8]], 896 +; GFX12-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP30]] +; GFX12-NEXT: [[TMP32:%.*]] = load atomic i8, ptr addrspace(7) [[TMP31]] unordered, align 1 +; GFX12-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX12-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP33]]) +; GFX12-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 1 +; GFX12-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP35]], 0 +; GFX12-NEXT: [[TMP37:%.*]] = and i8 [[TMP11]], 15 +; GFX12-NEXT: [[TMP38:%.*]] = and i8 [[TMP14]], 15 +; GFX12-NEXT: [[TMP39:%.*]] = shl i8 [[TMP38]], 4 +; GFX12-NEXT: [[TMP40:%.*]] = or i8 [[TMP37]], [[TMP39]] +; GFX12-NEXT: [[TMP41:%.*]] = and i8 [[TMP11]], -16 +; GFX12-NEXT: [[TMP42:%.*]] = lshr i8 [[TMP11]], 4 +; GFX12-NEXT: [[TMP43:%.*]] = and i8 [[TMP14]], -16 +; GFX12-NEXT: [[TMP44:%.*]] = or i8 [[TMP42]], [[TMP43]] +; GFX12-NEXT: [[TMP45:%.*]] = insertelement <4 x i8> poison, i8 [[TMP40]], i64 0 +; GFX12-NEXT: [[TMP46:%.*]] = insertelement <4 x i8> poison, i8 [[TMP44]], i64 0 +; GFX12-NEXT: [[TMP47:%.*]] = select i1 [[TMP36]], <4 x i8> [[TMP45]], <4 x i8> [[TMP46]] +; GFX12-NEXT: [[TMP48:%.*]] = and i8 [[TMP17]], 15 +; GFX12-NEXT: [[TMP49:%.*]] = and i8 [[TMP20]], 15 +; GFX12-NEXT: [[TMP50:%.*]] = shl i8 [[TMP49]], 4 +; GFX12-NEXT: [[TMP51:%.*]] = or i8 [[TMP48]], [[TMP50]] +; GFX12-NEXT: [[TMP52:%.*]] = and i8 [[TMP17]], -16 +; GFX12-NEXT: [[TMP53:%.*]] = lshr i8 [[TMP17]], 4 +; GFX12-NEXT: [[TMP54:%.*]] = and i8 [[TMP20]], -16 +; GFX12-NEXT: [[TMP55:%.*]] = or i8 [[TMP53]], [[TMP54]] +; GFX12-NEXT: [[TMP56:%.*]] = insertelement <4 x i8> [[TMP47]], i8 [[TMP51]], i64 1 +; GFX12-NEXT: [[TMP57:%.*]] = insertelement <4 x i8> [[TMP47]], i8 [[TMP55]], i64 1 +; GFX12-NEXT: [[TMP58:%.*]] = select i1 [[TMP36]], <4 x i8> [[TMP56]], <4 x i8> [[TMP57]] +; GFX12-NEXT: [[TMP59:%.*]] = and i8 [[TMP23]], 15 +; GFX12-NEXT: [[TMP60:%.*]] = and i8 [[TMP26]], 15 +; GFX12-NEXT: [[TMP61:%.*]] = shl i8 [[TMP60]], 4 +; GFX12-NEXT: [[TMP62:%.*]] = or i8 [[TMP59]], [[TMP61]] +; GFX12-NEXT: [[TMP63:%.*]] = and i8 [[TMP23]], -16 +; GFX12-NEXT: [[TMP64:%.*]] = lshr i8 [[TMP23]], 4 +; GFX12-NEXT: [[TMP65:%.*]] = and i8 [[TMP26]], -16 +; GFX12-NEXT: [[TMP66:%.*]] = or i8 [[TMP64]], [[TMP65]] +; GFX12-NEXT: [[TMP67:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP62]], i64 2 +; GFX12-NEXT: [[TMP68:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP66]], i64 2 +; GFX12-NEXT: [[TMP69:%.*]] = select i1 [[TMP36]], <4 x i8> [[TMP67]], <4 x i8> [[TMP68]] +; GFX12-NEXT: [[TMP70:%.*]] = and i8 [[TMP29]], 15 +; GFX12-NEXT: [[TMP71:%.*]] = and i8 [[TMP32]], 15 +; GFX12-NEXT: [[TMP72:%.*]] = shl i8 [[TMP71]], 4 +; GFX12-NEXT: [[TMP73:%.*]] = or i8 [[TMP70]], [[TMP72]] +; GFX12-NEXT: [[TMP74:%.*]] = and i8 [[TMP29]], -16 +; GFX12-NEXT: [[TMP75:%.*]] = lshr i8 [[TMP29]], 4 +; GFX12-NEXT: [[TMP76:%.*]] = and i8 [[TMP32]], -16 +; GFX12-NEXT: [[TMP77:%.*]] = or i8 [[TMP75]], [[TMP76]] +; GFX12-NEXT: [[TMP78:%.*]] = insertelement <4 x i8> [[TMP69]], i8 [[TMP73]], i64 3 +; GFX12-NEXT: [[TMP79:%.*]] = insertelement <4 x i8> [[TMP69]], i8 [[TMP77]], i64 3 +; GFX12-NEXT: [[TMP80:%.*]] = select i1 [[TMP36]], <4 x i8> [[TMP78]], <4 x i8> [[TMP79]] +; GFX12-NEXT: [[TMP81:%.*]] = shufflevector <4 x i8> [[TMP80]], <4 x i8> poison, <8 x i32> +; GFX12-NEXT: [[TMP82:%.*]] = bitcast <8 x i8> [[TMP81]] to <2 x i32> +; GFX12-NEXT: [[TMP83:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX12-NEXT: [[TMP84:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP83]]) +; GFX12-NEXT: [[TMP85:%.*]] = srem i32 [[TMP84]], 16 +; GFX12-NEXT: [[TMP86:%.*]] = udiv i32 [[TMP84]], 16 +; GFX12-NEXT: [[TMP87:%.*]] = mul i32 [[TMP86]], 4 +; GFX12-NEXT: [[TMP88:%.*]] = mul i32 [[TMP85]], 128 +; GFX12-NEXT: [[TMP89:%.*]] = add i32 [[TMP87]], [[TMP88]] +; GFX12-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 0 +; GFX12-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP90]] +; GFX12-NEXT: [[TMP92:%.*]] = load atomic i8, ptr addrspace(7) [[TMP91]] unordered, align 16 +; GFX12-NEXT: [[TMP93:%.*]] = insertelement <4 x i8> poison, i8 [[TMP92]], i64 0 +; GFX12-NEXT: [[TMP94:%.*]] = add i32 [[TMP89]], 1 +; GFX12-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP94]] +; GFX12-NEXT: [[TMP96:%.*]] = load atomic i8, ptr addrspace(7) [[TMP95]] unordered, align 1 +; GFX12-NEXT: [[TMP97:%.*]] = insertelement <4 x i8> [[TMP93]], i8 [[TMP96]], i64 1 +; GFX12-NEXT: [[TMP98:%.*]] = add i32 [[TMP89]], 2 +; GFX12-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP98]] +; GFX12-NEXT: [[TMP100:%.*]] = load atomic i8, ptr addrspace(7) [[TMP99]] unordered, align 2 +; GFX12-NEXT: [[TMP101:%.*]] = insertelement <4 x i8> [[TMP97]], i8 [[TMP100]], i64 2 +; GFX12-NEXT: [[TMP102:%.*]] = add i32 [[TMP89]], 3 +; GFX12-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP102]] +; GFX12-NEXT: [[TMP104:%.*]] = load atomic i8, ptr addrspace(7) [[TMP103]] unordered, align 1 +; GFX12-NEXT: [[TMP105:%.*]] = insertelement <4 x i8> [[TMP101]], i8 [[TMP104]], i64 3 +; GFX12-NEXT: [[TMP106:%.*]] = shufflevector <4 x i8> [[TMP105]], <4 x i8> poison, <8 x i32> +; GFX12-NEXT: [[TMP107:%.*]] = bitcast <8 x i8> [[TMP106]] to <2 x i32> +; GFX12-NEXT: ret <2 x i32> [[TMP82]] +; + %a = call <2 x i32> (...) @lgc.xdl.cooperative.matrix.load__v2i32(ptr addrspace(7) %ptr, i32 128, i1 false, i32 10, i32 5, i32 2, i32 16, i32 32) + %b = call <2 x i32> (...) @lgc.xdl.cooperative.matrix.load__v2i32(ptr addrspace(7) %ptr, i32 128, i1 true, i32 10, i32 5, i32 2, i32 16, i32 32) + ret <2 x i32> %a +} + +declare <2 x i32> @lgc.xdl.cooperative.matrix.load__v2i32(...) + +!llpc.compute.mode = !{!0} +!lgc.client = !{!1} +!lgc.options = !{!2} +!lgc.options.CS = !{!3} + +!0 = !{i32 128, i32 2, i32 1} +!1 = !{!"Vulkan"} +!2 = !{i32 -2108299168, i32 -1199997545, i32 1667044824, i32 -422575072, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 -1} +!3 = !{i32 219437737, i32 -1317595285, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} +!8 = !{i32 5} +!9 = !{i32 7} diff --git a/lgc/test/shaderdb/gfx12/gfx1200wavematrix-store-wave64.lgc b/lgc/test/shaderdb/gfx12/gfx1200wavematrix-store-wave64.lgc new file mode 100644 index 0000000000..795e190035 --- /dev/null +++ b/lgc/test/shaderdb/gfx12/gfx1200wavematrix-store-wave64.lgc @@ -0,0 +1,154 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 +; RUN: lgc --mcpu=gfx1201 -o - -passes=lgc-lower-cooperative-matrix %s | FileCheck --check-prefixes=GFX12 %s + +define void @test_i4_32X16_ab_layout(ptr addrspace(7) %ptr, <2 x i32> %a) !spirv.ExecutionModel !8 !lgc.shaderstage !9 { +; GFX12-LABEL: define void @test_i4_32X16_ab_layout +; GFX12-SAME: (ptr addrspace(7) [[PTR:%.*]], <2 x i32> [[A:%.*]]) !spirv.ExecutionModel [[META6:![0-9]+]] !lgc.shaderstage [[META7:![0-9]+]] { +; GFX12-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX12-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP1]]) +; GFX12-NEXT: [[TMP3:%.*]] = srem i32 [[TMP2]], 16 +; GFX12-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP2]], 16 +; GFX12-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 8 +; GFX12-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP3]], 1 +; GFX12-NEXT: [[TMP7:%.*]] = mul i32 [[TMP5]], 640 +; GFX12-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], [[TMP6]] +; GFX12-NEXT: [[TMP9:%.*]] = srem i32 [[TMP2]], 2 +; GFX12-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 640 +; GFX12-NEXT: [[TMP11:%.*]] = add i32 [[TMP8]], [[TMP10]] +; GFX12-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX12-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP12]]) +; GFX12-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[A]], i64 0 +; GFX12-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.permlane16.i32(i32 [[TMP14]], i32 [[TMP14]], i32 1732584193, i32 -271733879, i1 false, i1 false) +; GFX12-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to <4 x i8> +; GFX12-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[A]], i64 1 +; GFX12-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.permlane16.i32(i32 [[TMP17]], i32 [[TMP17]], i32 1732584193, i32 -271733879, i1 false, i1 false) +; GFX12-NEXT: [[TMP19:%.*]] = bitcast i32 [[TMP18]] to <4 x i8> +; GFX12-NEXT: [[TMP20:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +; GFX12-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> poison, <4 x i32> +; GFX12-NEXT: [[TMP22:%.*]] = add i32 [[TMP11]], 0 +; GFX12-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP22]] +; GFX12-NEXT: [[TMP24:%.*]] = extractelement <4 x i8> [[TMP21]], i64 0 +; GFX12-NEXT: [[TMP25:%.*]] = extractelement <4 x i8> [[TMP16]], i64 0 +; GFX12-NEXT: [[TMP26:%.*]] = and i32 [[TMP13]], 1 +; GFX12-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP26]], 0 +; GFX12-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i8 15, i8 -16 +; GFX12-NEXT: [[TMP29:%.*]] = and i8 [[TMP24]], [[TMP28]] +; GFX12-NEXT: [[TMP30:%.*]] = and i8 [[TMP25]], [[TMP28]] +; GFX12-NEXT: [[TMP31:%.*]] = shl i8 [[TMP30]], 4 +; GFX12-NEXT: [[TMP32:%.*]] = lshr i8 [[TMP30]], 4 +; GFX12-NEXT: [[TMP33:%.*]] = select i1 [[TMP27]], i8 [[TMP31]], i8 [[TMP32]] +; GFX12-NEXT: [[TMP34:%.*]] = or i8 [[TMP29]], [[TMP33]] +; GFX12-NEXT: store i8 [[TMP34]], ptr addrspace(7) [[TMP23]], align 1 +; GFX12-NEXT: [[TMP35:%.*]] = add i32 [[TMP11]], 1280 +; GFX12-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP35]] +; GFX12-NEXT: [[TMP37:%.*]] = extractelement <4 x i8> [[TMP21]], i64 1 +; GFX12-NEXT: [[TMP38:%.*]] = extractelement <4 x i8> [[TMP16]], i64 1 +; GFX12-NEXT: [[TMP39:%.*]] = and i32 [[TMP13]], 1 +; GFX12-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP39]], 0 +; GFX12-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i8 15, i8 -16 +; GFX12-NEXT: [[TMP42:%.*]] = and i8 [[TMP37]], [[TMP41]] +; GFX12-NEXT: [[TMP43:%.*]] = and i8 [[TMP38]], [[TMP41]] +; GFX12-NEXT: [[TMP44:%.*]] = shl i8 [[TMP43]], 4 +; GFX12-NEXT: [[TMP45:%.*]] = lshr i8 [[TMP43]], 4 +; GFX12-NEXT: [[TMP46:%.*]] = select i1 [[TMP40]], i8 [[TMP44]], i8 [[TMP45]] +; GFX12-NEXT: [[TMP47:%.*]] = or i8 [[TMP42]], [[TMP46]] +; GFX12-NEXT: store i8 [[TMP47]], ptr addrspace(7) [[TMP36]], align 1 +; GFX12-NEXT: [[TMP48:%.*]] = add i32 [[TMP11]], 2560 +; GFX12-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP48]] +; GFX12-NEXT: [[TMP50:%.*]] = extractelement <4 x i8> [[TMP21]], i64 2 +; GFX12-NEXT: [[TMP51:%.*]] = extractelement <4 x i8> [[TMP16]], i64 2 +; GFX12-NEXT: [[TMP52:%.*]] = and i32 [[TMP13]], 1 +; GFX12-NEXT: [[TMP53:%.*]] = icmp eq i32 [[TMP52]], 0 +; GFX12-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i8 15, i8 -16 +; GFX12-NEXT: [[TMP55:%.*]] = and i8 [[TMP50]], [[TMP54]] +; GFX12-NEXT: [[TMP56:%.*]] = and i8 [[TMP51]], [[TMP54]] +; GFX12-NEXT: [[TMP57:%.*]] = shl i8 [[TMP56]], 4 +; GFX12-NEXT: [[TMP58:%.*]] = lshr i8 [[TMP56]], 4 +; GFX12-NEXT: [[TMP59:%.*]] = select i1 [[TMP53]], i8 [[TMP57]], i8 [[TMP58]] +; GFX12-NEXT: [[TMP60:%.*]] = or i8 [[TMP55]], [[TMP59]] +; GFX12-NEXT: store i8 [[TMP60]], ptr addrspace(7) [[TMP49]], align 1 +; GFX12-NEXT: [[TMP61:%.*]] = add i32 [[TMP11]], 3840 +; GFX12-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP61]] +; GFX12-NEXT: [[TMP63:%.*]] = extractelement <4 x i8> [[TMP21]], i64 3 +; GFX12-NEXT: [[TMP64:%.*]] = extractelement <4 x i8> [[TMP16]], i64 3 +; GFX12-NEXT: [[TMP65:%.*]] = and i32 [[TMP13]], 1 +; GFX12-NEXT: [[TMP66:%.*]] = icmp eq i32 [[TMP65]], 0 +; GFX12-NEXT: [[TMP67:%.*]] = select i1 [[TMP66]], i8 15, i8 -16 +; GFX12-NEXT: [[TMP68:%.*]] = and i8 [[TMP63]], [[TMP67]] +; GFX12-NEXT: [[TMP69:%.*]] = and i8 [[TMP64]], [[TMP67]] +; GFX12-NEXT: [[TMP70:%.*]] = shl i8 [[TMP69]], 4 +; GFX12-NEXT: [[TMP71:%.*]] = lshr i8 [[TMP69]], 4 +; GFX12-NEXT: [[TMP72:%.*]] = select i1 [[TMP66]], i8 [[TMP70]], i8 [[TMP71]] +; GFX12-NEXT: [[TMP73:%.*]] = or i8 [[TMP68]], [[TMP72]] +; GFX12-NEXT: store i8 [[TMP73]], ptr addrspace(7) [[TMP62]], align 1 +; GFX12-NEXT: [[TMP74:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX12-NEXT: [[TMP75:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP74]]) +; GFX12-NEXT: [[TMP76:%.*]] = srem i32 [[TMP75]], 16 +; GFX12-NEXT: [[TMP77:%.*]] = udiv i32 [[TMP75]], 16 +; GFX12-NEXT: [[TMP78:%.*]] = mul i32 [[TMP77]], 4 +; GFX12-NEXT: [[TMP79:%.*]] = mul i32 [[TMP76]], 640 +; GFX12-NEXT: [[TMP80:%.*]] = add i32 [[TMP78]], [[TMP79]] +; GFX12-NEXT: [[TMP81:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +; GFX12-NEXT: [[TMP82:%.*]] = shufflevector <8 x i8> [[TMP81]], <8 x i8> poison, <4 x i32> +; GFX12-NEXT: [[TMP83:%.*]] = add i32 [[TMP80]], 0 +; GFX12-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP83]] +; GFX12-NEXT: [[TMP85:%.*]] = extractelement <4 x i8> [[TMP82]], i64 0 +; GFX12-NEXT: store i8 [[TMP85]], ptr addrspace(7) [[TMP84]], align 16 +; GFX12-NEXT: [[TMP86:%.*]] = add i32 [[TMP80]], 1 +; GFX12-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP86]] +; GFX12-NEXT: [[TMP88:%.*]] = extractelement <4 x i8> [[TMP82]], i64 1 +; GFX12-NEXT: store i8 [[TMP88]], ptr addrspace(7) [[TMP87]], align 1 +; GFX12-NEXT: [[TMP89:%.*]] = add i32 [[TMP80]], 2 +; GFX12-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP89]] +; GFX12-NEXT: [[TMP91:%.*]] = extractelement <4 x i8> [[TMP82]], i64 2 +; GFX12-NEXT: store i8 [[TMP91]], ptr addrspace(7) [[TMP90]], align 2 +; GFX12-NEXT: [[TMP92:%.*]] = add i32 [[TMP80]], 3 +; GFX12-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(7) [[PTR]], i32 [[TMP92]] +; GFX12-NEXT: [[TMP94:%.*]] = extractelement <4 x i8> [[TMP82]], i64 3 +; GFX12-NEXT: store i8 [[TMP94]], ptr addrspace(7) [[TMP93]], align 1 +; GFX12-NEXT: ret void +; + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 640, i1 false, i32 10, i32 5, i32 0, i32 16, <2 x i32> %a, i32 32) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 640, i1 true, i32 10, i32 5, i32 0, i32 16, <2 x i32> %a, i32 32) + ret void +} + +declare void @lgc.xdl.cooperative.matrix.store(...) + +!llpc.compute.mode = !{!0} +!lgc.client = !{!1} +!lgc.options = !{!2} +!lgc.options.CS = !{!3} + +!0 = !{i32 128, i32 2, i32 1} +!1 = !{!"Vulkan"} +!2 = !{i32 -2108299168, i32 -1199997545, i32 1667044824, i32 -422575072, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 -1} +!3 = !{i32 219437737, i32 -1317595285, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} +!8 = !{i32 5} +!9 = !{i32 7} diff --git a/lgc/test/shaderdb/gfx12/gfx1200wavematrix.lgc b/lgc/test/shaderdb/gfx12/gfx1200wavematrix.lgc new file mode 100644 index 0000000000..ee83153d21 --- /dev/null +++ b/lgc/test/shaderdb/gfx12/gfx1200wavematrix.lgc @@ -0,0 +1,116 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 +; RUN: lgc --mcpu=gfx1201 -o - -passes=lgc-lower-cooperative-matrix %s | FileCheck --check-prefixes=GFX12 %s + +define <2 x i32> @convert_f16_to_fp8(<4 x float> %load) { +; GFX12-LABEL: define <2 x i32> @convert_f16_to_fp8 +; GFX12-SAME: (<4 x float> [[LOAD:%.*]]) { +; GFX12-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX12-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[LOAD]] to <8 x half> +; GFX12-NEXT: [[CASTOPCONVERT:%.*]] = fpext <8 x half> [[TMP2]] to <8 x float> +; GFX12-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[CASTOPCONVERT]], i64 0 +; GFX12-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[CASTOPCONVERT]], i64 1 +; GFX12-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float [[TMP3]], float [[TMP4]], i32 0, i1 false) +; GFX12-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[CASTOPCONVERT]], i64 2 +; GFX12-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[CASTOPCONVERT]], i64 3 +; GFX12-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float [[TMP6]], float [[TMP7]], i32 [[TMP5]], i1 true) +; GFX12-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i64 0 +; GFX12-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[CASTOPCONVERT]], i64 4 +; GFX12-NEXT: [[TMP11:%.*]] = extractelement <8 x float> [[CASTOPCONVERT]], i64 5 +; GFX12-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float [[TMP10]], float [[TMP11]], i32 0, i1 false) +; GFX12-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[CASTOPCONVERT]], i64 6 +; GFX12-NEXT: [[TMP14:%.*]] = extractelement <8 x float> [[CASTOPCONVERT]], i64 7 +; GFX12-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float [[TMP13]], float [[TMP14]], i32 [[TMP12]], i1 true) +; GFX12-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP15]], i64 1 +; GFX12-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP16]] to <8 x i8> +; GFX12-NEXT: [[TMP18:%.*]] = bitcast <8 x i8> [[TMP17]] to <2 x i32> +; GFX12-NEXT: ret <2 x i32> [[TMP18]] +; + %convert = call <2 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v2i32(i32 45, <4 x float> %load, i32 1, i32 8, i32 4, i32 4) + ret <2 x i32> %convert +} + +define <4 x float> @convert_fp8_to_fp16(<2 x i32> %load) { +; GFX12-LABEL: define <4 x float> @convert_fp8_to_fp16 +; GFX12-SAME: (<2 x i32> [[LOAD:%.*]]) { +; GFX12-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX12-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[LOAD]] to <8 x i8> +; GFX12-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> +; GFX12-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i64 0 +; GFX12-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 [[TMP4]], i1 false) +; GFX12-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 [[TMP4]], i1 true) +; GFX12-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP6]], <4 x i32> +; GFX12-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP3]], i64 1 +; GFX12-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 [[TMP8]], i1 false) +; GFX12-NEXT: [[TMP10:%.*]] = call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 [[TMP8]], i1 true) +; GFX12-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <4 x i32> +; GFX12-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP11]], <8 x i32> +; GFX12-NEXT: [[CASTOPCONVERT:%.*]] = fptrunc <8 x float> [[TMP12]] to <8 x half> +; GFX12-NEXT: [[TMP13:%.*]] = bitcast <8 x half> [[CASTOPCONVERT]] to <4 x float> +; GFX12-NEXT: ret <4 x float> [[TMP13]] +; + %convert = call <4 x float> (...) @lgc.xdl.cooperative.matrix.convert__v4f32(i32 46, <2 x i32> %load, i32 8, i32 1, i32 4, i32 4) + ret <4 x float> %convert +} + +define <8 x float> @muladd_(<2 x i32> %load, <2 x i32> %load1, <8 x float> %load2) { +; GFX12-LABEL: define <8 x float> @muladd_ +; GFX12-SAME: (<2 x i32> [[LOAD:%.*]], <2 x i32> [[LOAD1:%.*]], <8 x float> [[LOAD2:%.*]]) { +; GFX12-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[LOAD]], <2 x i32> poison, <2 x i32> +; GFX12-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[LOAD1]], <2 x i32> poison, <2 x i32> +; GFX12-NEXT: [[MULADD1:%.*]] = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <8 x float> [[LOAD2]]) +; GFX12-NEXT: ret <8 x float> [[MULADD1]] +; + %mulAdd = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<2 x i32> %load, <2 x i32> %load1, <8 x float> %load2, i1 true, i1 true, i1 false, i1 false, i32 8, i32 9, i32 2, i32 2, i32 1) + ret <8 x float> %mulAdd +} + +define <4 x i32> @muladd_16x16x16_iu4(i32 %loadA, i32 %loadB, <4 x i32> %loadC) { +; GFX12-LABEL: define <4 x i32> @muladd_16x16x16_iu4 +; GFX12-SAME: (i32 [[LOADA:%.*]], i32 [[LOADB:%.*]], <4 x i32> [[LOADC:%.*]]) { +; GFX12-NEXT: [[MULADD1:%.*]] = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[LOADA]], i1 true, i32 [[LOADB]], <4 x i32> [[LOADC]], i1 false) +; GFX12-NEXT: ret <4 x i32> [[MULADD1]] +; + %mulAdd = call <4 x i32> (...) @lgc.xdl.cooperative.matrix.muladd__v4i32(i32 %loadA, i32 %loadB, <4 x i32> %loadC, i1 true, i1 true, i1 false, i1 false, i32 10, i32 10, i32 5, i32 5, i32 1) + ret <4 x i32> %mulAdd +} + +define <4 x i32> @muladd_16x16x32_iu4(i32 %loadA, i32 %loadB, <4 x i32> %loadC) { +; GFX12-LABEL: define <4 x i32> @muladd_16x16x32_iu4 +; GFX12-SAME: (i32 [[LOADA:%.*]], i32 [[LOADB:%.*]], <4 x i32> [[LOADC:%.*]]) { +; GFX12-NEXT: [[MULADD1:%.*]] = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[LOADA]], i1 true, i32 [[LOADB]], <4 x i32> [[LOADC]], i1 false) +; GFX12-NEXT: ret <4 x i32> [[MULADD1]] +; + %mulAdd = call <4 x i32> (...) @lgc.xdl.cooperative.matrix.muladd__v4i32(i32 %loadA, i32 %loadB, <4 x i32> %loadC, i1 true, i1 true, i1 false, i1 false, i32 10, i32 10, i32 5, i32 5, i32 2) + ret <4 x i32> %mulAdd +} + +declare <2 x i32> @lgc.xdl.cooperative.matrix.convert__v2i32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.muladd__v8f32(...) +declare <4 x float> @lgc.xdl.cooperative.matrix.convert__v4f32(...) +declare <4 x i32> @lgc.xdl.cooperative.matrix.muladd__v4i32(...) diff --git a/lgc/test/shaderdb/gfx12/lit.local.cfg b/lgc/test/shaderdb/gfx12/lit.local.cfg new file mode 100644 index 0000000000..2c5115503d --- /dev/null +++ b/lgc/test/shaderdb/gfx12/lit.local.cfg @@ -0,0 +1,27 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +if "llpc_build_gfx12" not in config.available_features: + config.unsupported = True diff --git a/lgc/test/shaderdb/gfx12/packed-accumulators-gfx12.lgc b/lgc/test/shaderdb/gfx12/packed-accumulators-gfx12.lgc new file mode 100644 index 0000000000..41159c48f6 --- /dev/null +++ b/lgc/test/shaderdb/gfx12/packed-accumulators-gfx12.lgc @@ -0,0 +1,739 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 +; RUN: lgc --mcpu=gfx1201 -o - -passes='require,function(lgc-combine-cooperative-matrix)' %s | FileCheck --check-prefixes=GFX12 %s + +define void @matmul_f16_pack_simple(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_simple +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi) + ret void +} + +define void @matmul_f16_pack_chain_sequential(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_chain_sequential +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX12-NEXT: ret void +; +entry: + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + ret void +} + +define void @matmul_f16_pack_chain_alternating(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_chain_alternating +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX12-NEXT: ret void +; +entry: + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + ret void +} + +define void @matmul_f16_pack_chain_nested(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_chain_nested +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX12-NEXT: ret void +; +entry: + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + ret void +} + +define void @matmul_f16_no_packable_chain(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0) { +; GFX12-LABEL: define void @matmul_f16_no_packable_chain +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_1]]) +; GFX12-NEXT: ret void +; +entry: + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.1) + ret void +} + +define void @matmul_f16_chain_loop(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; +; GFX12-LABEL: define void @matmul_f16_chain_loop +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: br label [[LOOP:%.*]] +; GFX12: loop: +; GFX12-NEXT: [[ACCUM0_PHI:%.*]] = phi <8 x float> [ [[CHAIN0_1]], [[ENTRY:%.*]] ], [ [[CHAIN0_2:%.*]], [[LOOP]] ] +; GFX12-NEXT: [[ACCUM1_PHI:%.*]] = phi <8 x float> [ [[CHAIN1_1]], [[ENTRY]] ], [ [[CHAIN1_2:%.*]], [[LOOP]] ] +; GFX12-NEXT: [[CHAIN0_2]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM0_PHI]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_2]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM1_PHI]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CC:%.*]] = call i1 @getcc() +; GFX12-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; GFX12: end: +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX12-NEXT: ret void +; +entry: + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + br label %loop + +loop: + %accum0.phi = phi <8 x float> [ %chain0.1, %entry ], [ %chain0.2, %loop ] + %accum1.phi = phi <8 x float> [ %chain1.1, %entry ], [ %chain1.2, %loop ] + + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum0.phi, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum1.phi, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + + %cc = call i1 @getcc() + br i1 %cc, label %loop, label %end + +end: + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + ret void +} + +define void @matmul_f16_chain_loop_phis(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) {; +; GFX12-LABEL: define void @matmul_f16_chain_loop_phis +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[ACCUM0_LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[OUT0]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) +; GFX12-NEXT: [[ACCUM1_LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[OUT1]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) +; GFX12-NEXT: br label [[HEADER:%.*]] +; GFX12: header: +; GFX12-NEXT: [[ACCUM0_PHI:%.*]] = phi <8 x float> [ [[ACCUM0_LOAD]], [[ENTRY:%.*]] ], [ [[MULADDLO:%.*]], [[LOOP:%.*]] ] +; GFX12-NEXT: [[ACCUM1_PHI:%.*]] = phi <8 x float> [ [[ACCUM1_LOAD]], [[ENTRY]] ], [ [[MULADDHI:%.*]], [[LOOP]] ] +; GFX12-NEXT: [[CC:%.*]] = call i1 @getcc() +; GFX12-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; GFX12: loop: +; GFX12-NEXT: [[MULADDLO]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM0_PHI]], i1 false, i1 false, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM1_PHI]], i1 false, i1 false, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: br label [[HEADER]] +; GFX12: end: +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[ACCUM0_PHI]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[ACCUM1_PHI]]) +; GFX12-NEXT: ret void +; +entry: + %accum0.load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %out0, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) #0 + %accum1.load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %out1, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) #0 + br label %header + +header: + %accum0.phi = phi <8 x float> [ %accum0.load, %entry ], [ %accum0.next, %loop ] + %accum1.phi = phi <8 x float> [ %accum1.load, %entry ], [ %accum1.next, %loop ] + %cc = call i1 @getcc() + br i1 %cc, label %loop, label %end + +loop: + %accum0.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum0.phi, i32 1, i32 1, i32 0, i32 1) + %accum1.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum1.phi, i32 1, i32 1, i32 0, i32 1) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum0.cvt, i1 false, i1 false, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum1.cvt, i1 false, i1 false, i1 false, i1 false, i32 1, i32 1, i32 1) + %accum0.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdLo, i32 1, i32 1, i32 1, i32 0) + %accum1.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdHi, i32 1, i32 1, i32 1, i32 0) + + br label %header + +end: + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum0.phi) #2 + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum1.phi) #2 + ret void +} + +define void @matmul_f16_chain_branch(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_chain_branch +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CC:%.*]] = call i1 @getcc() +; GFX12-NEXT: br i1 [[CC]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; GFX12: if_true: +; GFX12-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: br label [[END:%.*]] +; GFX12: if_false: +; GFX12-NEXT: [[A_FALSE:%.*]] = call <8 x float> @getmat1() +; GFX12-NEXT: [[B_FALSE:%.*]] = call <8 x float> @getmat1() +; GFX12-NEXT: [[CHAIN0_3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A_FALSE]], <8 x float> [[B_FALSE]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A_FALSE]], <8 x float> [[B_FALSE]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: br label [[END]] +; GFX12: end: +; GFX12-NEXT: [[ACCUM0_PHI:%.*]] = phi <8 x float> [ [[CHAIN0_2]], [[IF_TRUE]] ], [ [[CHAIN0_3]], [[IF_FALSE]] ] +; GFX12-NEXT: [[ACCUM1_PHI:%.*]] = phi <8 x float> [ [[CHAIN1_2]], [[IF_TRUE]] ], [ [[CHAIN1_3]], [[IF_FALSE]] ] +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[ACCUM0_PHI]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[ACCUM1_PHI]]) +; GFX12-NEXT: ret void +; +entry: + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %cc = call i1 @getcc() + br i1 %cc, label %if_true, label %if_false + +if_true: + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + + br label %end +if_false: + %a.false = call <8 x float> @getmat1() + %b.false = call <8 x float> @getmat1() + + %chain0.3 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a.false, <8 x float> %b.false, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.3 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a.false, <8 x float> %b.false, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + + br label %end + +end: + %accum0.phi = phi <8 x float> [ %chain0.2, %if_true ], [ %chain0.3, %if_false ] + %accum1.phi = phi <8 x float> [ %chain1.2, %if_true ], [ %chain1.3, %if_false ] + + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum0.phi) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum1.phi) + ret void +} + +define void @matmul_f16_chain_diff_bbs(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_chain_diff_bbs +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CC:%.*]] = call i1 @getcc() +; GFX12-NEXT: br label [[CONT:%.*]] +; GFX12: cont: +; GFX12-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: br label [[END:%.*]] +; GFX12: end: +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX12-NEXT: ret void +; +entry: + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %cc = call i1 @getcc() + br label %cont +cont: + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + + br label %end +end: + + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + ret void +} + +define void @matmul_f16_pack_loop(ptr %out0, ptr %out1) { +; GFX12-LABEL: define void @matmul_f16_pack_loop +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[ACCUM0_LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[OUT0]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16, i32 1) +; GFX12-NEXT: [[ACCUM1_LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[OUT1]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16, i32 1) +; GFX12-NEXT: br label [[LOOP:%.*]] +; GFX12: loop: +; GFX12-NEXT: [[ACCUM0_PHI:%.*]] = phi <8 x float> [ [[ACCUM0_LOAD]], [[ENTRY:%.*]] ], [ [[MULADDLO:%.*]], [[LOOP]] ] +; GFX12-NEXT: [[ACCUM1_PHI:%.*]] = phi <8 x float> [ [[ACCUM1_LOAD]], [[ENTRY]] ], [ [[MULADDHI:%.*]], [[LOOP]] ] +; GFX12-NEXT: [[A:%.*]] = call <8 x float> @getmat1() +; GFX12-NEXT: [[B:%.*]] = call <8 x float> @getmat1() +; GFX12-NEXT: [[MULADDLO]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM0_PHI]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM1_PHI]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CC:%.*]] = call i1 @getcc() +; GFX12-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; GFX12: end: +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[MULADDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[MULADDHI]]) +; GFX12-NEXT: ret void +; +entry: + %accum0.load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %out0, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16, i32 1) + %accum1.load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %out1, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16, i32 1) + br label %loop + +loop: + %accum0.phi = phi <8 x float> [ %accum0.load, %entry ], [ %accum0.next, %loop ] + %accum1.phi = phi <8 x float> [ %accum1.load, %entry ], [ %accum1.next, %loop ] + + %a = call <8 x float> @getmat1() + %b = call <8 x float> @getmat1() + + %accum0.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum0.phi, i32 1, i32 1, i32 0, i32 1) + %accum1.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum1.phi, i32 1, i32 1, i32 0, i32 1) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum0.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum1.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %accum0.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdLo, i32 1, i32 1, i32 1, i32 0) + %accum1.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdHi, i32 1, i32 1, i32 1, i32 0) + + %cc = call i1 @getcc() + br i1 %cc, label %loop, label %end + +end: + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum0.next) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum1.next) + ret void +} + +define void @matmul_f16_pack_scalar_same(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_scalar_same +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDLO]], half 0xH310F, i32 1, i32 1) +; GFX12-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], half 0xH310F, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH310F, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + ret void +} + +define void @matmul_f16_pack_scalar_different(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_scalar_different +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDLO]], half 0xH310F, i32 1, i32 1) +; GFX12-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], half 0xH3100, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH3100, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + ret void +} + +define void @matmul_f16_pack_scalar_only_lo(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_scalar_only_lo +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDLO]], half 0xH310F, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi) + ret void +} + +define void @matmul_f16_pack_scalar_only_hi(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_scalar_only_hi +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], half 0xH3100, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH3100, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + ret void +} + +define void @matmul_f16_pack_scalar_diff_bbs(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_scalar_diff_bbs +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: br label [[SCALE_LO:%.*]] +; GFX12: scale_lo: +; GFX12-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDLO]], half 0xH310F, i32 1, i32 1) +; GFX12-NEXT: br label [[SCALE_HI:%.*]] +; GFX12: scale_hi: +; GFX12-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], half 0xH310F, i32 1, i32 1) +; GFX12-NEXT: br label [[END:%.*]] +; GFX12: end: +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + br label %scale_lo + +scale_lo: + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + br label %scale_hi + +scale_hi: + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH310F, i32 1, i32 1) + br label %end + +end: + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + ret void +} + +define void @matmul_f16_pack_user_between_scalar(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_user_between_scalar +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: br label [[SCALE:%.*]] +; GFX12: scale: +; GFX12-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDLO]], half 0xH310F, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX12-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], half 0xH310F, i32 1, i32 1) +; GFX12-NEXT: br label [[END:%.*]] +; GFX12: end: +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + br label %scale + +scale: + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH310F, i32 1, i32 1) + br label %end + +end: + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + ret void +} + +define void @matmul_f16_pack_factor_between_scalar(ptr %in, ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_pack_factor_between_scalar +; GFX12-SAME: (ptr [[IN:%.*]], ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: br label [[SCALE:%.*]] +; GFX12: scale: +; GFX12-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDLO]], half 0xH310F, i32 1, i32 1) +; GFX12-NEXT: [[FACTORHI:%.*]] = load half, ptr [[IN]], align 2 +; GFX12-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], half [[FACTORHI]], i32 1, i32 1) +; GFX12-NEXT: br label [[END:%.*]] +; GFX12: end: +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + br label %scale + +scale: + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + %factorHi = load half, ptr %in + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half %factorHi, i32 1, i32 1) + br label %end + +end: + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + ret void +} + +define void @matmul_f16_pack_binop_fadd(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1, <8 x float> %c2, <8 x float> %c3) { +; GFX12-LABEL: define void @matmul_f16_pack_binop_fadd +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]], <8 x float> [[C2:%.*]], <8 x float> [[C3:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C3]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[BINOPLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADDLO0]], <8 x float> [[MULADDLO1]], i32 1, i32 1) +; GFX12-NEXT: [[BINOPHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADDHI0]], <8 x float> [[MULADDHI1]], i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdLo1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c3, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %binOpLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo0, <8 x float> %muladdLo1, i32 1, i32 1) #3 + %binOpHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdHi0, <8 x float> %muladdHi1, i32 1, i32 1) #3 + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpHi) + ret void +} + +define void @matmul_f16_pack_binop_incompatible_matrices(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1, <8 x float> %c2, <8 x float> %c3) { +; GFX12-LABEL: define void @matmul_f16_pack_binop_incompatible_matrices +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]], <8 x float> [[C2:%.*]], <8 x float> [[C3:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C3]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[BINOPLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADDLO0]], <8 x float> [[MULADDLO1]], i32 1, i32 1) +; GFX12-NEXT: [[BINOPHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADDHI0]], <8 x float> [[MULADDHI0]], i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdLo1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c3, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %binOpLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo0, <8 x float> %muladdLo1, i32 1, i32 1) #3 + %binOpHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdHi0, <8 x float> %muladdHi0, i32 1, i32 1) #3 + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpHi) + ret void +} + +define void @matmul_f16_pack_binop_incompatible_arithop(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1, <8 x float> %c2, <8 x float> %c3) { +; GFX12-LABEL: define void @matmul_f16_pack_binop_incompatible_arithop +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]], <8 x float> [[C2:%.*]], <8 x float> [[C3:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C3]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[BINOPLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADDLO0]], <8 x float> [[MULADDLO1]], i32 1, i32 1) +; GFX12-NEXT: [[BINOPHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 3, <8 x float> [[MULADDHI0]], <8 x float> [[MULADDHI1]], i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdLo1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c3, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %binOpLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo0, <8 x float> %muladdLo1, i32 1, i32 1) #3 + %binOpHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 3, <8 x float> %muladdHi0, <8 x float> %muladdHi1, i32 1, i32 1) #3 + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpHi) + ret void +} + +define void @matmul_f16_unpack_before_convert(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_unpack_before_convert +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CONVERTLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[MULADDLO0]], i32 1, i32 1, i32 1, i32 0) +; GFX12-NEXT: [[CONVERTHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[MULADDHI0]], i32 1, i32 1, i32 1, i32 0) +; GFX12-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[CONVERTLO]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[CONVERTHI]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDLO1]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDHI1]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %convertLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdLo0, i32 1, i32 1, i32 1, i32 0) + %convertHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdHi0, i32 1, i32 1, i32 1, i32 0) + %muladdLo1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %convertLo, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %convertHi, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi1) + ret void +} + +define void @matmul_f32_no_pack(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f32_no_pack +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> [[MULADDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> [[MULADDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> %muladdLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> %muladdHi) + ret void +} + +define void @matmul_f16_modified_accumulator(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_modified_accumulator +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[ACCUM_C2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADDLO]], <8 x float> [[C1]], i32 1, i32 1) +; GFX12-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM_C2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDLO]]) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDHI]]) +; GFX12-NEXT: ret void +; +entry: + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %accum.c2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo, <8 x float> %c1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum.c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi) + ret void +} + +define void @matmul_f16_store_between_muladds(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_store_between_muladds +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) +; GFX12-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX12-NEXT: ret void +; +entry: + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + ret void +} + +define void @matmul_f16_store_within_chain(ptr %out0, ptr %out1, <8 x float> %a, <8 x float> %b, <8 x float> %c0, <8 x float> %c1) { +; GFX12-LABEL: define void @matmul_f16_store_within_chain +; GFX12-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { +; GFX12-NEXT: entry: +; GFX12-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) +; GFX12-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX12-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX12-NEXT: ret void +; +entry: + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + ret void +} + +declare i1 @getcc() +declare <8 x float> @getmat1() + +declare <8 x float> @lgc.xdl.cooperative.matrix.load__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.convert__v8f32(...) +declare void @lgc.xdl.cooperative.matrix.store(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.muladd__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.times.scalar__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.binop__v8f32(...) diff --git a/lgc/test/shaderdb/gfx12/s_buffer_load-conversion-gfx12.lgc b/lgc/test/shaderdb/gfx12/s_buffer_load-conversion-gfx12.lgc new file mode 100644 index 0000000000..303d0c9802 --- /dev/null +++ b/lgc/test/shaderdb/gfx12/s_buffer_load-conversion-gfx12.lgc @@ -0,0 +1,336 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 5 +; RUN: lgc --mcpu=gfx1201 -o - -passes="require,module(lgc-lower-desc),module(lgc-mutate-entry-point),function(lgc-lower-buffer-operations)" %s | FileCheck --check-prefixes=GFX12 %s + +define amdgpu_kernel void @strided_buffer_uniform_strided_load(<4 x i32> %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX12-LABEL: define amdgpu_gfx void @strided_buffer_uniform_strided_load( +; GFX12-SAME: <4 x i32> [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0:[0-9]+]] !lgc.shaderstage [[META7:![0-9]+]] { +; GFX12-NEXT: [[ENTRY:.*:]] +; GFX12-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX12-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX12-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX12-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX12-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX12-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[USERDATA4]], i64 0 +; GFX12-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[USERDATA5]], i64 1 +; GFX12-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to i64 +; GFX12-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> +; GFX12-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i64 0 +; GFX12-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i64 1 +; GFX12-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0 +; GFX12-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], 1048576 +; GFX12-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i64 1 +; GFX12-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 -1, i64 2 +; GFX12-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 838946732, i64 3 +; GFX12-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP15]], i64 1 +; GFX12-NEXT: [[TMP17:%.*]] = lshr i32 [[TMP16]], 16 +; GFX12-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 16383 +; GFX12-NEXT: [[TMP19:%.*]] = mul i32 24, [[TMP18]] +; GFX12-NEXT: [[TMP20:%.*]] = add i32 0, [[TMP19]] +; GFX12-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[TMP15]], i32 [[TMP20]], i32 0), !invariant.load [[META8:![0-9]+]] +; GFX12-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float +; GFX12-NEXT: store float [[TMP22]], ptr [[OUT]], align 4 +; GFX12-NEXT: ret void +; +entry: + %buf = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) + %146 = call ptr @llvm.invariant.start.p9(i64 -1, ptr addrspace(9) %buf) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf, i32 24) + %res = load float, ptr addrspace(9) %buf.idx, align 4 + store float %res, ptr %out, align 4 + ret void +} + +define amdgpu_kernel void @strided_buffer_uniform_strided_load_f16(<4 x i32> %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX12-LABEL: define amdgpu_gfx void @strided_buffer_uniform_strided_load_f16( +; GFX12-SAME: <4 x i32> [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META7]] { +; GFX12-NEXT: [[ENTRY:.*:]] +; GFX12-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX12-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX12-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX12-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX12-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX12-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[USERDATA4]], i64 0 +; GFX12-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[USERDATA5]], i64 1 +; GFX12-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to i64 +; GFX12-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> +; GFX12-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i64 0 +; GFX12-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i64 1 +; GFX12-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0 +; GFX12-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], 1048576 +; GFX12-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i64 1 +; GFX12-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 -1, i64 2 +; GFX12-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 838946732, i64 3 +; GFX12-NEXT: [[TMP16:%.*]] = call i16 @llvm.amdgcn.struct.buffer.load.i16{{(\.v4i32)?}}(<4 x i32> [[TMP15]], i32 24, i32 0, i32 0, i32 0) +; GFX12-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP16]] to half +; GFX12-NEXT: store half [[TMP17]], ptr [[OUT]], align 2 +; GFX12-NEXT: ret void +; +entry: + %buf = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) + %146 = call ptr @llvm.invariant.start.p9(i64 -1, ptr addrspace(9) %buf) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf, i32 24) + %res = load half, ptr addrspace(9) %buf.idx, align 2 + store half %res, ptr %out, align 2 + ret void +} + +define amdgpu_kernel void @strided_buffer_uniform_strided_load_i8(<4 x i32> %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX12-LABEL: define amdgpu_gfx void @strided_buffer_uniform_strided_load_i8( +; GFX12-SAME: <4 x i32> [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META7]] { +; GFX12-NEXT: [[ENTRY:.*:]] +; GFX12-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX12-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX12-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX12-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX12-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX12-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[USERDATA4]], i64 0 +; GFX12-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[USERDATA5]], i64 1 +; GFX12-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to i64 +; GFX12-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> +; GFX12-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i64 0 +; GFX12-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i64 1 +; GFX12-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0 +; GFX12-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], 1048576 +; GFX12-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i64 1 +; GFX12-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 -1, i64 2 +; GFX12-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 838946732, i64 3 +; GFX12-NEXT: [[TMP16:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8{{(\.v4i32)?}}(<4 x i32> [[TMP15]], i32 24, i32 0, i32 0, i32 0) +; GFX12-NEXT: store i8 [[TMP16]], ptr [[OUT]], align 1 +; GFX12-NEXT: ret void +; +entry: + %buf = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) + %146 = call ptr @llvm.invariant.start.p9(i64 -1, ptr addrspace(9) %buf) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf, i32 24) + %res = load i8, ptr addrspace(9) %buf.idx, align 1 + store i8 %res, ptr %out, align 1 + ret void +} + +define amdgpu_kernel void @strided_buffer_uniform_strided_load_v4i32(<4 x i32> %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX12-LABEL: define amdgpu_gfx void @strided_buffer_uniform_strided_load_v4i32( +; GFX12-SAME: <4 x i32> [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META7]] { +; GFX12-NEXT: [[ENTRY:.*:]] +; GFX12-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX12-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX12-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX12-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX12-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX12-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[USERDATA4]], i64 0 +; GFX12-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[USERDATA5]], i64 1 +; GFX12-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to i64 +; GFX12-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> +; GFX12-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i64 0 +; GFX12-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i64 1 +; GFX12-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0 +; GFX12-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], 1048576 +; GFX12-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i64 1 +; GFX12-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 -1, i64 2 +; GFX12-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 838946732, i64 3 +; GFX12-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP15]], i64 1 +; GFX12-NEXT: [[TMP17:%.*]] = lshr i32 [[TMP21]], 16 +; GFX12-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 16383 +; GFX12-NEXT: [[TMP19:%.*]] = mul i32 24, [[TMP18]] +; GFX12-NEXT: [[TMP20:%.*]] = add i32 0, [[TMP19]] +; GFX12-NEXT: [[TMP16:%.*]] = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> [[TMP15]], i32 [[TMP20]], i32 0), !invariant.load [[META8]] +; GFX12-NEXT: store <4 x i32> [[TMP16]], ptr [[OUT]], align 16 +; GFX12-NEXT: ret void +; +entry: + %buf = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) + %146 = call ptr @llvm.invariant.start.p9(i64 -1, ptr addrspace(9) %buf) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf, i32 24) + %res = load <4 x i32>, ptr addrspace(9) %buf.idx, align 16 + store <4 x i32> %res, ptr %out, align 16 + ret void +} + +define amdgpu_kernel void @strided_buffer_convert_uniform_strided_load(<4 x i32> inreg %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX12-LABEL: define amdgpu_gfx void @strided_buffer_convert_uniform_strided_load( +; GFX12-SAME: <4 x i32> inreg [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META7]] { +; GFX12-NEXT: [[ENTRY:.*:]] +; GFX12-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX12-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX12-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX12-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX12-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX12-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[DESC]], i64 1 +; GFX12-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], -1073676289 +; GFX12-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 786432 +; GFX12-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[DESC]], i32 [[TMP7]], i64 1 +; GFX12-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i64 2 +; GFX12-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 12 +; GFX12-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP10]], i64 2 +; GFX12-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP11]], i64 3 +; GFX12-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], -805306369 +; GFX12-NEXT: [[TMP14:%.*]] = or i32 [[TMP13]], 268435456 +; GFX12-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 3 +; GFX12-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[DESC]], i32 add (i32 ptrtoint (ptr addrspace(6) getelementptr inbounds (i8, ptr addrspace(6) null, i32 8) to i32), i32 288), i32 0), !invariant.load [[META8]] +; GFX12-NEXT: [[TMP17:%.*]] = bitcast i32 [[TMP16]] to float +; GFX12-NEXT: store float [[TMP17]], ptr [[OUT]], align 4 +; GFX12-NEXT: ret void +; +entry: + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %146 = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) %ptr) + %buf.off = getelementptr inbounds i8, ptr addrspace(7) %ptr, i32 8 + %buf.cnv = call ptr addrspace(9) @lgc.convert.to.strided.buffer.pointer(ptr addrspace(7) %buf.off, i32 12) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf.cnv, i32 24) + %res = load float, ptr addrspace(9) %buf.idx, align 4 + store float %res, ptr %out, align 4 + ret void +} + +define amdgpu_kernel void @strided_buffer_convert_uniform_strided_load_f16(<4 x i32> inreg %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX12-LABEL: define amdgpu_gfx void @strided_buffer_convert_uniform_strided_load_f16( +; GFX12-SAME: <4 x i32> inreg [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META7]] { +; GFX12-NEXT: [[ENTRY:.*:]] +; GFX12-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX12-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX12-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX12-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX12-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX12-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[DESC]], i64 1 +; GFX12-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], -1073676289 +; GFX12-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 786432 +; GFX12-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[DESC]], i32 [[TMP7]], i64 1 +; GFX12-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i64 2 +; GFX12-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 12 +; GFX12-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP10]], i64 2 +; GFX12-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP11]], i64 3 +; GFX12-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], -805306369 +; GFX12-NEXT: [[TMP14:%.*]] = or i32 [[TMP13]], 268435456 +; GFX12-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 3 +; GFX12-NEXT: [[TMP16:%.*]] = call i16 @llvm.amdgcn.struct.buffer.load.i16{{(\.v4i32)?}}(<4 x i32> [[TMP15]], i32 24, i32 ptrtoint (ptr addrspace(6) getelementptr inbounds (i8, ptr addrspace(6) null, i32 8) to i32), i32 0, i32 0) +; GFX12-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP16]] to half +; GFX12-NEXT: store half [[TMP17]], ptr [[OUT]], align 2 +; GFX12-NEXT: ret void +; +entry: + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %146 = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) %ptr) + %buf.off = getelementptr inbounds i8, ptr addrspace(7) %ptr, i32 8 + %buf.cnv = call ptr addrspace(9) @lgc.convert.to.strided.buffer.pointer(ptr addrspace(7) %buf.off, i32 12) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf.cnv, i32 24) + %res = load half, ptr addrspace(9) %buf.idx, align 2 + store half %res, ptr %out, align 2 + ret void +} + +define amdgpu_kernel void @strided_buffer_convert_uniform_strided_load_i8(<4 x i32> inreg %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX12-LABEL: define amdgpu_gfx void @strided_buffer_convert_uniform_strided_load_i8( +; GFX12-SAME: <4 x i32> inreg [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META7]] { +; GFX12-NEXT: [[ENTRY:.*:]] +; GFX12-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX12-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX12-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX12-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX12-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX12-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[DESC]], i64 1 +; GFX12-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], -1073676289 +; GFX12-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 786432 +; GFX12-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[DESC]], i32 [[TMP7]], i64 1 +; GFX12-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i64 2 +; GFX12-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 12 +; GFX12-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP10]], i64 2 +; GFX12-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP11]], i64 3 +; GFX12-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], -805306369 +; GFX12-NEXT: [[TMP14:%.*]] = or i32 [[TMP13]], 268435456 +; GFX12-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 3 +; GFX12-NEXT: [[TMP16:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8{{(\.v4i32)?}}(<4 x i32> [[TMP15]], i32 24, i32 ptrtoint (ptr addrspace(6) getelementptr inbounds (i8, ptr addrspace(6) null, i32 8) to i32), i32 0, i32 0) +; GFX12-NEXT: store i8 [[TMP16]], ptr [[OUT]], align 1 +; GFX12-NEXT: ret void +; +entry: + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %146 = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) %ptr) + %buf.off = getelementptr inbounds i8, ptr addrspace(7) %ptr, i32 8 + %buf.cnv = call ptr addrspace(9) @lgc.convert.to.strided.buffer.pointer(ptr addrspace(7) %buf.off, i32 12) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf.cnv, i32 24) + %res = load i8, ptr addrspace(9) %buf.idx, align 1 + store i8 %res, ptr %out, align 1 + ret void +} + +define amdgpu_kernel void @strided_buffer_convert_uniform_strided_load_v4i32(<4 x i32> inreg %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX12-LABEL: define amdgpu_gfx void @strided_buffer_convert_uniform_strided_load_v4i32( +; GFX12-SAME: <4 x i32> inreg [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] !lgc.shaderstage [[META7]] { +; GFX12-NEXT: [[ENTRY:.*:]] +; GFX12-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX12-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX12-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX12-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX12-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX12-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[DESC]], i64 1 +; GFX12-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], -1073676289 +; GFX12-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 786432 +; GFX12-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[DESC]], i32 [[TMP7]], i64 1 +; GFX12-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i64 2 +; GFX12-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 12 +; GFX12-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP10]], i64 2 +; GFX12-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP11]], i64 3 +; GFX12-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], -805306369 +; GFX12-NEXT: [[TMP14:%.*]] = or i32 [[TMP13]], 268435456 +; GFX12-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 3 +; GFX12-NEXT: [[TMP16:%.*]] = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> [[DESC]], i32 add (i32 ptrtoint (ptr addrspace(6) getelementptr inbounds (i8, ptr addrspace(6) null, i32 8) to i32), i32 288), i32 0), !invariant.load [[META8]] +; GFX12-NEXT: store <4 x i32> [[TMP16]], ptr [[OUT]], align 16 +; GFX12-NEXT: ret void +; +entry: + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %146 = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) %ptr) + %buf.off = getelementptr inbounds i8, ptr addrspace(7) %ptr, i32 8 + %buf.cnv = call ptr addrspace(9) @lgc.convert.to.strided.buffer.pointer(ptr addrspace(7) %buf.off, i32 12) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf.cnv, i32 24) + %res = load <4 x i32>, ptr addrspace(9) %buf.idx, align 16 + store <4 x i32> %res, ptr %out, align 16 + ret void +} + +declare ptr addrspace(9) @lgc.load.strided.buffer.desc(i64, i32, i32, i32, i32) #0 +declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>, i1) nounwind readnone +declare ptr addrspace(9) @lgc.convert.to.strided.buffer.pointer(ptr addrspace(7), i32) +declare ptr addrspace(7) @lgc.buffer.load.desc.to.ptr(ptr addrspace(4), i1, i1) nounwind readnone +declare ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9), i32) #0 + +attributes #0 = { nounwind willreturn memory(none) } + +!llpc.compute.mode = !{!0} + +!lgc.user.data.nodes = !{!1, !2, !3} + +!0 = !{i32 16, i32 16, i32 1} +!1 = !{!"DescriptorTableVaPtr", i32 7, i32 255, i32 3, i32 1, i32 1} +!2 = !{!"DescriptorMutable", i32 17, i32 0, i32 0, i32 40, i64 4294967296, i32 0, i32 8} +!3 = !{!"DescriptorConstBufferCompact", i32 15, i32 255, i32 4, i32 2, i64 8589934592, i32 0, i32 2} +!4 = !{i32 7} +;. +; GFX12: [[META7]] = !{i32 7} +; GFX12: [[META8]] = !{} +;. diff --git a/lgc/util/GfxRegHandler.cpp b/lgc/util/GfxRegHandler.cpp index f24e8561cd..d30297c0d7 100644 --- a/lgc/util/GfxRegHandler.cpp +++ b/lgc/util/GfxRegHandler.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -115,6 +115,9 @@ SqImgSampRegHandler::SqImgSampRegHandler(IRBuilder<> *builder, Value *reg, GfxIp switch (gfxIpVersion->major) { case 10: case 11: +#if LLPC_BUILD_GFX12 + case 12: +#endif m_bitsInfo = SqImgSampRegBitsGfx9; break; default: @@ -212,6 +215,34 @@ static constexpr BitsInfo SqImgRsrcRegBitsGfx11[static_cast(SqRsrcRegs {6, 0, 7}, // MinLodHi }; +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// SqImgSampReg Bits information look up table (Gfx12) +// TODO: update comment when the registers file is available +static constexpr BitsInfo SqImgRsrcRegBitsGfx12[static_cast(SqRsrcRegs::Count)] = { + {0, 0, 32}, // BaseAddress + {1, 0, 8}, // BaseAddressHi + {1, 17, 8}, // Format + {}, // Width + {2, 14, 16}, // Height + {3, 0, 12}, // DstSelXYZW + {3, 20, 5}, // SwizzleMode + {3, 28, 4}, // Type + {4, 0, 14}, // Depth + {}, // Pitch + {3, 25, 3}, // BcSwizzle + {1, 25, 5}, // BaseLevel + {3, 15, 5}, // LastLevel + {4, 16, 13}, // BaseArray + {1, 30, 2}, // WidthLo + {2, 0, 14}, // WidthHi + {5, 4, 1}, // ArrayPitch (aka UAV3D) + {}, // MinLod + {5, 26, 6}, // MinLodLo + {6, 0, 7}, // MinLodHi +}; +#endif + // ===================================================================================================================== // Helper class for handling Registers defined in SQ_IMG_RSRC_WORD // @@ -229,6 +260,11 @@ SqImgRsrcRegHandler::SqImgRsrcRegHandler(IRBuilder<> *builder, Value *reg, GfxIp case 11: m_bitsInfo = SqImgRsrcRegBitsGfx11; break; +#if LLPC_BUILD_GFX12 + case 12: + m_bitsInfo = SqImgRsrcRegBitsGfx12; + break; +#endif default: llvm_unreachable("GFX IP is not supported!"); break; @@ -258,6 +294,9 @@ Value *SqImgRsrcRegHandler::getReg(SqRsrcRegs regId) { case 10: return getRegCommon(static_cast(regId)); case 11: +#if LLPC_BUILD_GFX12 + case 12: +#endif return getRegCombine(static_cast(SqRsrcRegs::MinLodLo), static_cast(SqRsrcRegs::MinLodHi)); default: llvm_unreachable("GFX IP is not supported!"); @@ -272,6 +311,9 @@ Value *SqImgRsrcRegHandler::getReg(SqRsrcRegs regId) { switch (m_gfxIpVersion->major) { case 10: case 11: +#if LLPC_BUILD_GFX12 + case 12: +#endif return m_builder->CreateAdd( getRegCombine(static_cast(SqRsrcRegs::WidthLo), static_cast(SqRsrcRegs::WidthHi)), m_one); return m_builder->CreateAdd(getRegCommon(static_cast(SqRsrcRegs::Width)), m_one); @@ -311,6 +353,9 @@ void SqImgRsrcRegHandler::setReg(SqRsrcRegs regId, Value *regValue) { setRegCommon(static_cast(regId), regValue); break; case 11: +#if LLPC_BUILD_GFX12 + case 12: +#endif setRegCombine(static_cast(SqRsrcRegs::MinLodLo), static_cast(SqRsrcRegs::MinLodHi), regValue); break; default: @@ -327,6 +372,9 @@ void SqImgRsrcRegHandler::setReg(SqRsrcRegs regId, Value *regValue) { switch (m_gfxIpVersion->major) { case 10: case 11: +#if LLPC_BUILD_GFX12 + case 12: +#endif setRegCombine(static_cast(SqRsrcRegs::WidthLo), static_cast(SqRsrcRegs::WidthHi), m_builder->CreateSub(regValue, m_one)); break; diff --git a/lgc/util/RegStackUsage.cpp b/lgc/util/RegStackUsage.cpp index 90c7d71c5c..eca2322563 100644 --- a/lgc/util/RegStackUsage.cpp +++ b/lgc/util/RegStackUsage.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -68,6 +68,9 @@ static const struct { MsgPackScanner::Item funcLdsSize = {MsgPackScanner::ItemType::Scalar, ".lds_size"}; MsgPackScanner::Item funcSgprCount = {MsgPackScanner::ItemType::Scalar, ".sgpr_count"}; MsgPackScanner::Item funcVgprCount = {MsgPackScanner::ItemType::Scalar, ".vgpr_count"}; +#if LLPC_BUILD_GFX12 + MsgPackScanner::Item funcOutgoingVgprCount = {MsgPackScanner::ItemType::Scalar, ".outgoing_vgpr_count"}; +#endif MsgPackScanner::Item endTheFunc = {MsgPackScanner::ItemType::EndContainer}; MsgPackScanner::Item endShaderFunctions = {MsgPackScanner::ItemType::EndContainer}; MsgPackScanner::Item shaders = {MsgPackScanner::ItemType::Map, ".shaders"}; @@ -129,6 +132,9 @@ struct Usage { unsigned ldsSize; unsigned sgprCount; unsigned vgprCount; +#if LLPC_BUILD_GFX12 + unsigned outgoingVgprCount; +#endif bool cpsGlobal; bool scratchEn; bool memOrdered; @@ -146,6 +152,9 @@ struct Usage { << " ldsSize " << usage.ldsSize << "\n" << " sgprCount " << usage.sgprCount << "\n" << " vgprCount " << usage.vgprCount << "\n" +#if LLPC_BUILD_GFX12 + << " outgoingVgprCount " << usage.outgoingVgprCount << "\n" +#endif << " cpsGlobal " << usage.cpsGlobal << "\n" << " scratchEn " << usage.scratchEn << "\n" << " memOrdered " << usage.memOrdered << "\n"; @@ -232,6 +241,12 @@ void RegStackUsage::finalizeAndUpdate(SmallVectorImpl &elfBuffer, size_t s m_impl->finalizeAndUpdate(elfBuffer, startOffset, frontendGlobalAlignment); } +#if LLPC_BUILD_GFX12 +unsigned RegStackUsage::getMaxOutgoingVgprCount() const { + return m_impl->getUsage().outgoingVgprCount; +} +#endif + // ===================================================================================================================== // Construct from ELF blob. This reads the reg/stack usage from the ELF's PAL metadata. // This is passed rayGenUsage to allow for a future enhancement where frontend stack size is calculated in a @@ -305,6 +320,10 @@ void RegStackUsageImpl::scanPalMetadata() { m_usage.sgprCount = std::max(m_usage.sgprCount, unsigned(msgPackScanner.asInt(item).value_or(0))); else if (&item == &items.csVgprCount || &item == &items.funcVgprCount) m_usage.vgprCount = std::max(m_usage.vgprCount, unsigned(msgPackScanner.asInt(item).value_or(0))); +#if LLPC_BUILD_GFX12 + else if (&item == &items.funcOutgoingVgprCount) + m_usage.outgoingVgprCount = std::max(m_usage.outgoingVgprCount, unsigned(msgPackScanner.asInt(item).value_or(0))); +#endif else if (&item == &items.csMemOrdered) m_usage.memOrdered = msgPackScanner.asBool(item).value_or(false); // scratchEn and scratchMemorySize are read solely for the "Re-scan the new blob" check (in updateAndWrite) @@ -364,6 +383,9 @@ void RegStackUsageImpl::merge(const RegStackUsageImpl &shaderUsage) { m_usage.ldsSize = std::max(m_usage.ldsSize, shaderUsage.m_usage.ldsSize); m_usage.sgprCount = std::max(m_usage.sgprCount, shaderUsage.m_usage.sgprCount); m_usage.vgprCount = std::max(m_usage.vgprCount, shaderUsage.m_usage.vgprCount); +#if LLPC_BUILD_GFX12 + m_usage.outgoingVgprCount = std::max(m_usage.outgoingVgprCount, shaderUsage.m_usage.outgoingVgprCount); +#endif m_usage.memOrdered = std::max(m_usage.memOrdered, shaderUsage.m_usage.memOrdered); m_usage.callableShaderCount += shaderUsage.m_usage.callableShaderCount; diff --git a/lgc/util/WorkgroupLayout.cpp b/lgc/util/WorkgroupLayout.cpp index c71211c786..2361ea493b 100644 --- a/lgc/util/WorkgroupLayout.cpp +++ b/lgc/util/WorkgroupLayout.cpp @@ -87,7 +87,11 @@ SwizzleWorkgroupLayout lgc::calculateWorkgroupLayout(PipelineState *pipelineStat resultLayout.microLayout = WorkgroupLayout::Linear; } - if (pipelineState->getOptions().forceCsThreadIdSwizzling) { + if (pipelineState->getOptions().forceCsThreadIdSwizzling +#if LLPC_BUILD_GFX12 + || (pipelineState->getOptions().xInterleave == 3 && pipelineState->getOptions().yInterleave == 3) +#endif + ) { if ((workgroupSizeX >= 16) && (workgroupSizeX % 8 == 0) && (workgroupSizeY % 4 == 0)) { resultLayout.macroLayout = WorkgroupLayout::SexagintiQuads; } diff --git a/llpc/context/llpcCompiler.cpp b/llpc/context/llpcCompiler.cpp index 427789accd..997ca0945b 100644 --- a/llpc/context/llpcCompiler.cpp +++ b/llpc/context/llpcCompiler.cpp @@ -3375,6 +3375,13 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, return result; } +#if LLPC_BUILD_GFX12 + if (rtContext.isDynamicVgprEnabled()) { + // Set up max outgoing VGPR count metadata for kernel entry + lgc::cps::setMaxOutgoingVgprCount(*getEntryPoint(entry.get()), + rtContext.getRayTracingLibrarySummary().maxOutgoingVgprCount); + } +#endif // Build entry module at very last. #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 75 const bool needEntry = true; @@ -3446,6 +3453,67 @@ void Compiler::adjustRayTracingElf(ElfPackage *pipelineElf, RayTracingContext *r shaderFunction[PalAbi::ShaderMetadataKey::ApiShaderHash].getArray(true)[1] = pipelineHash[1]; } +#if LLPC_BUILD_GFX12 + if (rtContext->isDynamicVgprEnabled()) { + // 3. Resolve DVGPR requirement and relocations + ElfReader reader(m_gfxIp); + size_t readSize = 0; + result = reader.ReadFromBuffer(pipelineElf->data(), &readSize); + assert(result == Result::Success); + + constexpr unsigned dvgprBitsInShaderId = 0x38; + + auto entryFuncVgprCount = + shaderFunctionSection.begin()->second.getMap(true)[PalAbi::HardwareStageMetadataKey::VgprCount].getUInt(); + // The required number of VGPR blocks minus 1 is stored at 3..5 bit. + assert((shaderProp.shaderIdExtraBits & dvgprBitsInShaderId) == 0); + assert(entryFuncVgprCount <= 128); + shaderProp.shaderIdExtraBits |= (llvm::divideCeil(entryFuncVgprCount, 16) - 1) << 3; + + for (unsigned i = 0; i < reader.getRelocationCount(); ++i) { + ElfReloc reloc = {}; + reader.getRelocation(i, &reloc); + ElfSymbol elfSym = {}; + reader.getSymbol(reloc.symIdx, &elfSym); + StringRef relocName = elfSym.pSymName; + + constexpr const char dvgprRelocPrefix[] = "_dvgpr$"; + if (relocName.starts_with(dvgprRelocPrefix)) { + StringRef targetFuncName = relocName.substr(strlen(dvgprRelocPrefix)); + assert(reader.isValidSymbol(targetFuncName.data()) && "Target function for dVGPR does not exist"); + auto &funcMeta = shaderFunctionSection[targetFuncName].getMap(true); + auto &vgprCount = funcMeta[PalAbi::HardwareStageMetadataKey::VgprCount].getUInt(); + + // The required number of VGPR blocks minus 1 is stored at 3..5 bit. + unsigned relocValue = (llvm::divideCeil(vgprCount, 16) - 1) << 3; + // Change the relocation from `_dvgpr$` to ``, so that we can get the function address. + unsigned targetSymbolIndex = reader.getSymbolIndexByName(targetFuncName.data()); + writer.fixupRelocation(i, relocValue, targetSymbolIndex, dvgprBitsInShaderId); + } + } + + // 4. Collect maximum outgoing VGPR count and update library summary + unsigned maxOutGoingVgprCount = 0; + for (auto &funcSection : shaderFunctionSection) { + auto &funcMeta = funcSection.second.getMap(true); + // FIXME: g_palPipelineAbiMetadata.h isn't getting PAL_BUILD_GFX12 enabled here + auto outgoingVgprCountMeta = funcMeta.find(".outgoing_vgpr_count"); + if (outgoingVgprCountMeta != funcMeta.end()) { + unsigned outGoingVgprCount = outgoingVgprCountMeta->second.getUInt(); + maxOutGoingVgprCount = std::max(maxOutGoingVgprCount, outGoingVgprCount); + } + } + assert(maxOutGoingVgprCount > 0); + { + // Library summary in rtContext could be shared between threads, need to ensure it is only modified by one thread + // at a time. + std::lock_guard lock(getHelperThreadMutex()); + auto &summary = rtContext->getRayTracingLibrarySummary(); + summary.maxOutgoingVgprCount = std::max(summary.maxOutgoingVgprCount, maxOutGoingVgprCount); + } + } +#endif + // Write modified metadata to the pipeline ELF ElfNote newMetaNote = metaNote; std::string destBlob; diff --git a/llpc/context/llpcPipelineContext.cpp b/llpc/context/llpcPipelineContext.cpp index 991848d9eb..4f661a8f66 100644 --- a/llpc/context/llpcPipelineContext.cpp +++ b/llpc/context/llpcPipelineContext.cpp @@ -265,6 +265,12 @@ Options PipelineContext::computePipelineOptions() const { options.forceCsThreadIdSwizzling = getPipelineOptions()->forceCsThreadIdSwizzling; options.includeIr = (IncludeLlvmIr || getPipelineOptions()->includeIr); +#if LLPC_BUILD_GFX12 + options.cacheScopePolicyControl = getPipelineOptions()->cacheScopePolicyControl; + options.temporalHintControl = getPipelineOptions()->temporalHintControl; + options.disableDynamicVgpr = true; +#endif + options.threadGroupSwizzleMode = static_cast(getPipelineOptions()->threadGroupSwizzleMode); @@ -308,6 +314,9 @@ Options PipelineContext::computePipelineOptions() const { options.enableInterpModePatch = getPipelineOptions()->enableInterpModePatch; options.pageMigrationEnabled = getPipelineOptions()->pageMigrationEnabled; options.resourceLayoutScheme = static_cast(getPipelineOptions()->resourceLayoutScheme); +#if LLPC_BUILD_GFX12 + options.expertSchedulingMode = getPipelineOptions()->expertSchedulingMode; +#endif options.optimizePointSizeWrite = getPipelineOptions()->optimizePointSizeWrite; // Driver report full subgroup lanes for compute shader, here we just set fullSubgroups as default options @@ -710,6 +719,9 @@ ShaderOptions PipelineContext::computeShaderOptions(const PipelineShaderInfo &sh shaderOptions.waveSize = shaderInfo.options.waveSize; shaderOptions.wgpMode = shaderInfo.options.wgpMode; +#if LLPC_BUILD_GFX12 + shaderOptions.temporalHintShaderControl = shaderInfo.options.temporalHintShaderControl; +#endif // If subgroupSize is specified, we should use the specified value. if (shaderInfo.options.subgroupSize != 0) shaderOptions.subgroupSize = shaderInfo.options.subgroupSize; @@ -803,6 +815,9 @@ ShaderOptions PipelineContext::computeShaderOptions(const PipelineShaderInfo &sh "Mismatch"); shaderOptions.aggressiveInvariantLoads = static_cast(shaderInfo.options.aggressiveInvariantLoads); +#if LLPC_BUILD_GFX12 + shaderOptions.workgroupRoundRobin = shaderInfo.options.workgroupRoundRobin; +#endif shaderOptions.viewIndexFromDeviceIndex = shaderInfo.options.viewIndexFromDeviceIndex; shaderOptions.forceUnderflowPrevention = shaderInfo.options.forceUnderflowPrevention; diff --git a/llpc/context/llpcRayTracingContext.cpp b/llpc/context/llpcRayTracingContext.cpp index f0526bfefe..3ee3317e0b 100644 --- a/llpc/context/llpcRayTracingContext.cpp +++ b/llpc/context/llpcRayTracingContext.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -169,6 +169,12 @@ bool RayTracingContext::isContinuationsMode() const { // Continuations mode is only enabled for indirect mode. if (getIndirectStageMask() != 0) { if (getRaytracingMode() == Vkgc::LlpcRaytracingMode::Auto) { +#if LLPC_BUILD_GFX12 + if (getGfxIpVersion().major >= 12) { + // For GFX12+, enable continuations mode by default. + isContinuations = true; + } +#endif } else if (getRaytracingMode() == Vkgc::LlpcRaytracingMode::Continuations) { // Client require continuations mode explicitly. isContinuations = true; @@ -312,6 +318,11 @@ lgc::Options RayTracingContext::computePipelineOptions() const { options.cpsFlags = m_pipelineInfo->cpsFlags; +#if LLPC_BUILD_GFX12 + options.disableDynamicVgpr = m_pipelineInfo->disableDynamicVgpr; + options.dynamicVgprBlockSize = m_pipelineInfo->dynamicVgprBlockSize; +#endif + #endif return options; diff --git a/llpc/context/llpcRayTracingContext.h b/llpc/context/llpcRayTracingContext.h index c36f8faa54..aa6e47990a 100644 --- a/llpc/context/llpcRayTracingContext.h +++ b/llpc/context/llpcRayTracingContext.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -120,6 +120,13 @@ class RayTracingContext : public PipelineContext { Vkgc::LlpcRaytracingMode getRaytracingMode() const { return m_pipelineInfo->mode; } bool isContinuationsMode() const; unsigned getCpsFlag() { return m_pipelineInfo->cpsFlags; } +#if LLPC_BUILD_GFX12 + bool isDynamicVgprEnabled() { + return (m_gfxIp.major >= 12) && + (isContinuationsMode() || getRaytracingMode() == Vkgc::LlpcRaytracingMode::Continufy) && + !m_pipelineInfo->disableDynamicVgpr; + } +#endif void updateRayFlagsKnownBits(const llvm::KnownBits &knownBits) { if (m_rayFlagsKnownBits.has_value()) { m_rayFlagsKnownBits = m_rayFlagsKnownBits->intersectWith(knownBits); diff --git a/llpc/lowering/LowerGlobals.cpp b/llpc/lowering/LowerGlobals.cpp index bf2894f2d7..54ba4cb0d7 100644 --- a/llpc/lowering/LowerGlobals.cpp +++ b/llpc/lowering/LowerGlobals.cpp @@ -1536,6 +1536,9 @@ void LowerGlobals::lowerBufferBlock() { const unsigned descSet = mdconst::extract(resMetaNode->getOperand(0))->getZExtValue(); const unsigned binding = mdconst::extract(resMetaNode->getOperand(1))->getZExtValue(); +#if LLPC_BUILD_GFX12 + bool isNoAllocResource = global.hasMetadata(gSPIRVMD::ResourceNoAlloc); +#endif // AtomicCounter is emulated following same impl of SSBO, only qualifier 'offset' will be used in its // MD now. Using a new MD kind to detect it. AtomicCounter's type should be uint, not a structure. @@ -1633,6 +1636,10 @@ void LowerGlobals::lowerBufferBlock() { // pointers are removing zero-index GEPs and BitCast with pointer to pointer cast. m_builder->SetInsertPoint(replaceInstsInfo.otherInst); unsigned bufferFlags = global.isConstant() ? 0 : lgc::Builder::BufferFlagWritten; +#if LLPC_BUILD_GFX12 + if (isNoAllocResource) + bufferFlags |= lgc::Builder::BufferFlagLLcNoAlloc; +#endif auto descTy = lgc::ResourceNodeType::DescriptorBuffer; Value *const bufferDesc = @@ -1720,6 +1727,10 @@ void LowerGlobals::lowerBufferBlock() { bufferFlags |= lgc::Builder::BufferFlagNonUniform; if (!global.isConstant()) bufferFlags |= lgc::Builder::BufferFlagWritten; +#if LLPC_BUILD_GFX12 + if (isNoAllocResource) + bufferFlags |= lgc::Builder::BufferFlagLLcNoAlloc; +#endif Value *bufferDescs[2] = {nullptr}; unsigned descSets[2] = {descSet, 0}; @@ -1801,6 +1812,11 @@ void LowerGlobals::lowerBufferBlock() { m_builder->SetInsertPointPastAllocas(func); unsigned bufferFlags = global.isConstant() ? 0 : lgc::Builder::BufferFlagWritten; +#if LLPC_BUILD_GFX12 + if (isNoAllocResource) + bufferFlags |= lgc::Builder::BufferFlagLLcNoAlloc; +#endif + auto descTy = lgc::ResourceNodeType::DescriptorBuffer; Value *const bufferDesc = isAccelerationStructure diff --git a/llpc/lowering/ProcessGpuRtLibrary.cpp b/llpc/lowering/ProcessGpuRtLibrary.cpp index 7d85d75434..51cab23ae0 100644 --- a/llpc/lowering/ProcessGpuRtLibrary.cpp +++ b/llpc/lowering/ProcessGpuRtLibrary.cpp @@ -183,6 +183,14 @@ ProcessGpuRtLibrary::LibraryFunctionTable::LibraryFunctionTable() { #endif m_libFuncPtrs["AmdExtD3DShaderIntrinsics_ShaderMarker"] = &ProcessGpuRtLibrary::createShaderMarker; m_libFuncPtrs["AmdExtD3DShaderIntrinsics_WaveScan"] = &ProcessGpuRtLibrary::createWaveScan; +#if LLPC_BUILD_GFX12 + m_libFuncPtrs["AmdTraceRayDualIntersectRay"] = &ProcessGpuRtLibrary::createDualIntersectRay; + m_libFuncPtrs["AmdTraceRayIntersectRayBvh8"] = &ProcessGpuRtLibrary::createIntersectRayBvh8; + m_libFuncPtrs["AmdTraceRayDsStackPush8Pop1"] = &ProcessGpuRtLibrary::createDsStackPush8Pop1; + m_libFuncPtrs["AmdTraceRayDsStackPush8Pop2"] = &ProcessGpuRtLibrary::createDsStackPush8Pop2; + m_libFuncPtrs["AmdTraceRayDsStackPush8Pop1PrimRangeEnabled"] = + &ProcessGpuRtLibrary::createDsStackPush8Pop1PrimRangeEnabled; +#endif m_libFuncPtrs["AmdExtD3DShaderIntrinsics_FloatOpWithRoundMode"] = &ProcessGpuRtLibrary::createFloatOpWithRoundMode; m_libFuncPtrs["AmdExtDispatchThreadIdFlat"] = &ProcessGpuRtLibrary::createDispatchThreadIdFlat; m_libFuncPtrs["AmdTraceRaySampleGpuTimer"] = &ProcessGpuRtLibrary::createSampleGpuTimer; @@ -1001,4 +1009,234 @@ void ProcessGpuRtLibrary::createWaveScan(llvm::Function *func) { m_builder->CreateRet(m_builder->create(waveOp, flags, src0)); } +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +void ProcessGpuRtLibrary::createDualIntersectRay(Function *func) { + createIntersectRay(func, true); +} + +void ProcessGpuRtLibrary::createIntersectRayBvh8(Function *func) { + createIntersectRay(func, false); +} + +// ===================================================================================================================== +// Create function to return dual ray intersection result +// +// @param func : The function to create +void ProcessGpuRtLibrary::createIntersectRay(Function *func, bool isDualNode) { + auto rtip = m_gpurtKey.rtipVersion; + if (m_gpurtKey.bvhResDesc.size() < 4 || (rtip < Vkgc::RtIpVersion({3, 0}) && rtip != Vkgc::RtIpVersion({1, 5}))) { + // Don't generate code for non fitting RTIP. + m_builder->CreateRet(PoisonValue::get(func->getReturnType())); + return; + } + auto argIt = func->arg_begin(); + // 1. + // struct DualIntersectResult + // { + // uint4 first; + // uint4 second; + // uint2 geometryId; + // }; + // DualIntersectResult AmdTraceRayDualIntersectRay( + // in uint2 baseNodePtr, + // inout float3 rayOrigin, + // inout float3 rayDir, + // in float rayExtent, + // in uint instanceMask, + // in uint boxSortHeuristic, + // in uint node0, + // in uint node1) + // { + // bvhSrd = generateBvhSrd() + // offsets.x = node0 + // offsets.y = node1 + // call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh.dual.intersect.ray(i64 %node_ptr, float + // %ray_extent, i8 %instance_mask, <3 x float> %ray_origin, <3 x float> %ray_dir, <2 x i32> %offsets, + // <4 x i32> %tdescr) + // } + + // 2. + // struct Bvh8IntersectResult + // { + // uint4 slot0; + // uint4 slot1; + // uint2 ext; + // } + + // Bvh8IntersectResult AmdTraceRayIntersectRayBvh8( + // in uint2 baseNodePtr, + // inout float3 rayOrigin, + // inout float3 rayDir, + // in float rayExtent, + // in uint instanceMask, + // in uint boxSortHeuristic, + // in uint node) + // { + // bvhSrd = generateBvhSrd() + // offsets = node + // call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float + // %ray_extent, i8 %instance_mask, <3 x float> %ray_origin, <3 x float> %ray_dir, %offsets, + // <4 x i32> %tdescr) + // } + + // uint2 baseNodePtr + Value *baseNodePtr = m_builder->CreateLoad(FixedVectorType::get(m_builder->getInt32Ty(), 2), argIt); + baseNodePtr = m_builder->CreateBitCast(baseNodePtr, m_builder->getInt64Ty()); + argIt++; + + // float3 rayOrigin + Value *rayOrigin = m_builder->CreateLoad(FixedVectorType::get(m_builder->getFloatTy(), 3), argIt); + argIt++; + + // float3 rayDir + Value *rayDir = m_builder->CreateLoad(FixedVectorType::get(m_builder->getFloatTy(), 3), argIt); + argIt++; + + // float rayExtent + Value *rayExtent = m_builder->CreateLoad(m_builder->getFloatTy(), argIt); + argIt++; + + // uint instanceMask + Value *instanceMask = m_builder->CreateLoad(m_builder->getInt32Ty(), argIt); + instanceMask = m_builder->CreateTrunc(instanceMask, m_builder->getInt8Ty()); + argIt++; + + // uint boxSortHeuristic + Value *boxSortHeuristic = m_builder->CreateLoad(m_builder->getInt32Ty(), argIt); + argIt++; + + // uint node0 + Value *node0 = m_builder->CreateLoad(m_builder->getInt32Ty(), argIt); + Value *dualNodes = PoisonValue::get(FixedVectorType::get(Type::getInt32Ty(*m_context), 2)); + if (isDualNode) { + argIt++; + // uint node1 + Value *node1 = m_builder->CreateLoad(m_builder->getInt32Ty(), argIt); + // Pack two node together + dualNodes = m_builder->CreateInsertElement(dualNodes, node0, uint64_t(0)); + dualNodes = m_builder->CreateInsertElement(dualNodes, node1, 1); + } + + Value *imageDesc = createGetBvhSrd(nullptr, boxSortHeuristic); + + auto intx10Ty = llvm::FixedVectorType::get(m_builder->getInt32Ty(), 10); + auto floatx3Ty = llvm::FixedVectorType::get(m_builder->getFloatTy(), 3); + Type *returnTy = llvm::StructType::get(m_builder->getContext(), {intx10Ty, floatx3Ty, floatx3Ty}); + std::string callName = + (isDualNode == 1) ? "llvm.amdgcn.image.bvh.dual.intersect.ray" : "llvm.amdgcn.image.bvh8.intersect.ray"; + + Value *result = m_builder->CreateNamedCall( + callName, returnTy, + {baseNodePtr, rayExtent, instanceMask, rayOrigin, rayDir, isDualNode ? dualNodes : node0, imageDesc}, {}); + + // @llvm.amdgcn.image.bvh.dual.intersect.ray and @llvm.amdgcn.image.bvh8.intersect.ray intrinsic + // returns {<10 x i32>, <3 x float>, <3 x float>}, which are: + // DualIntersectResult/Bvh8IntersectResult, ray_origin, ray_dir. + Value *dualIntersectOrBvh8Result = m_builder->CreateExtractValue(result, 0); + Value *resultFirst = m_builder->CreateShuffleVector(dualIntersectOrBvh8Result, ArrayRef{0, 1, 2, 3}); + Value *resultSecond = m_builder->CreateShuffleVector(dualIntersectOrBvh8Result, ArrayRef{4, 5, 6, 7}); + Value *resultGeometryId = m_builder->CreateShuffleVector(dualIntersectOrBvh8Result, ArrayRef{8, 9}); + + Value *resultRayOrigin = m_builder->CreateExtractValue(result, 1); + Value *resultRayDir = m_builder->CreateExtractValue(result, 2); + + assert(func->getReturnType()->isStructTy() && (func->getReturnType()->getStructNumElements() == 3)); + Value *ret = PoisonValue::get(func->getReturnType()); + ret = m_builder->CreateInsertValue(ret, resultFirst, 0); + ret = m_builder->CreateInsertValue(ret, resultSecond, 1); + ret = m_builder->CreateInsertValue(ret, resultGeometryId, 2); + + // Store rayOrigin and rayDir back. + m_builder->CreateStore(resultRayOrigin, func->getArg(1)); + m_builder->CreateStore(resultRayDir, func->getArg(2)); + + m_builder->CreateRet(ret); +} + +// ===================================================================================================================== +// Push 8 nodes to LDS stack and Pop N nodes +// +// @param func : The function to create +// @param returnNodeCount : Number of returned node +// @param primRangeEnable : Whether to enable primitive range +void ProcessGpuRtLibrary::createDsStackPush8PopN(Function *func, unsigned returnNodeCount, bool primRangeEnable) { + assert((returnNodeCount == 1) || (returnNodeCount == 2)); + assert(m_context->getGfxIpVersion().major >= 12); + + auto int32x4Ty = FixedVectorType::get(m_builder->getInt32Ty(), 4); + const static unsigned MaxLdsStackEntries = 16; + + auto argIt = func->arg_begin(); + Value *stackAddr = argIt++; + Value *stackAddrVal = m_builder->CreateLoad(m_builder->getInt32Ty(), stackAddr); + Value *lastNodePtr = m_builder->CreateLoad(m_builder->getInt32Ty(), argIt++); + Value *data0 = m_builder->CreateLoad(int32x4Ty, argIt++); + Value *data1 = m_builder->CreateLoad(int32x4Ty, argIt); + + Value *data = m_builder->CreateShuffleVector(data0, data1, ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); + + // OFFSET = {OFFSET1, OFFSET0} + // stack_size[4:0] = OFFSET0[4:0] + assert(MaxLdsStackEntries == 16); + unsigned offsetVal = MaxLdsStackEntries; + if (primRangeEnable) { + assert(returnNodeCount == 1); + // NOTE: For the push8-pop1 variant, bit 1 of OFFSET1 indicates if primitive range is enabled. We set the bit + // here by request. + offsetVal |= 1 << 9; + } + + Value *offset = m_builder->getInt32(offsetVal); + + Intrinsic::AMDGCNIntrinsics intrinsic = (returnNodeCount == 1) ? Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn + : Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn; + Value *result = m_builder->CreateIntrinsic(intrinsic, {}, {stackAddrVal, lastNodePtr, data, offset}); + + m_builder->CreateStore(m_builder->CreateExtractValue(result, 1), stackAddr); + + Value *ret = m_builder->CreateExtractValue(result, 0); + + if (returnNodeCount == 1) { + m_builder->CreateRet(ret); + } else { + // llvm.amdgcn.ds.bvh.stack.push8.pop2.rtn returns i64, cast it to uvec2. + m_builder->CreateRet(m_builder->CreateBitCast(ret, FixedVectorType::get(m_builder->getInt32Ty(), 2))); + } +} + +// ===================================================================================================================== +// Create function to do LDS stack push 8 pop 1 +// +// @param func : The function to create +void ProcessGpuRtLibrary::createDsStackPush8Pop1(Function *func) { + if (m_gpurtKey.rtipVersion >= Vkgc::RtIpVersion({3, 0})) + createDsStackPush8PopN(func, 1, false); + else + m_builder->CreateRet(PoisonValue::get(func->getReturnType())); +} + +// ===================================================================================================================== +// Create function to do LDS stack push 8 pop 2 +// +// @param func : The function to create +void ProcessGpuRtLibrary::createDsStackPush8Pop2(Function *func) { + if (m_gpurtKey.rtipVersion >= Vkgc::RtIpVersion({3, 0}) || m_gpurtKey.rtipVersion == Vkgc::RtIpVersion({1, 5})) + createDsStackPush8PopN(func, 2, false); + else + m_builder->CreateRet(PoisonValue::get(func->getReturnType())); +} + +// ===================================================================================================================== +// Create function to do LDS stack push 8 pop 1 with primitive range enabled +// +// @param func : The function to create +void ProcessGpuRtLibrary::createDsStackPush8Pop1PrimRangeEnabled(Function *func) { + if (m_gpurtKey.rtipVersion >= Vkgc::RtIpVersion({3, 0})) + createDsStackPush8PopN(func, 1, true); + else + m_builder->CreateRet(PoisonValue::get(func->getReturnType())); +} +#endif + } // namespace Llpc diff --git a/llpc/lowering/ProcessGpuRtLibrary.h b/llpc/lowering/ProcessGpuRtLibrary.h index d6c6765fa7..b6d7ef4f31 100644 --- a/llpc/lowering/ProcessGpuRtLibrary.h +++ b/llpc/lowering/ProcessGpuRtLibrary.h @@ -146,6 +146,15 @@ class ProcessGpuRtLibrary : public SpirvLower, public llvm::PassInfoMixin' should exist in final ELF relocation section. +; Repeat 4 times since this pipeline will output 4 ELFs. +; CHECK-LABEL: .rel.text {{.*}} +; CHECK-NOT: _dvgpr$ +; CHECK-LABEL: .strtab {{.*}} + +; CHECK-LABEL: .rel.text {{.*}} +; CHECK-NOT: _dvgpr$ +; CHECK-LABEL: .strtab {{.*}} + +; CHECK-LABEL: .rel.text {{.*}} +; CHECK-NOT: _dvgpr$ +; CHECK-LABEL: .strtab {{.*}} + +; CHECK-LABEL: .rel.text {{.*}} +; CHECK-NOT: _dvgpr$ +; CHECK-LABEL: .strtab {{.*}} + +[Version] +version = 69 + +[rgenGlsl] +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + vec3 color; +}; + +layout(binding = 0, set = 0) uniform accelerationStructureEXT g_bvh; +layout(binding = 1, set = 0, rgba32f) uniform image2D g_dst; + +layout(location = 14) rayPayloadEXT RayPayload g_ray; + +void main() { + vec3 origin; + origin.x = gl_LaunchIDEXT.x; + origin.y = gl_LaunchIDEXT.y; + origin.z = 0; + + traceRayEXT(g_bvh, /* ray flags */ 0, /* cull mask */ 0xff, + /* sbt offset */ 0, /* sbt stride */ 1, /* miss index */ 0, + origin.xyz, /* tmin */ 0.0, /* direction */ vec3(1, 0, 0), + /* tmax */ 48.0, /* payload location */ 14); + + imageStore(g_dst, ivec2(gl_LaunchIDEXT.xy), vec4(g_ray.color, 0)); +} + +[rgenInfo] +entryPoint = main + +[chitGlsl] +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + vec3 color; +}; + +layout(shaderRecordEXT, std430) buffer sbt { + float z; +}; + +hitAttributeEXT vec2 g_hit; +rayPayloadInEXT RayPayload g_ray; + +void main() { + g_ray.color.xy = g_hit; + g_ray.color.z = z; +} + +[chitInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 0xffffffff +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorConstBuffer +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 4 +userDataNode[0].next[0].set = 0x00000000 +userDataNode[0].next[0].binding = 0 +userDataNode[0].next[1].type = DescriptorImage +userDataNode[0].next[1].offsetInDwords = 4 +userDataNode[0].next[1].sizeInDwords = 8 +userDataNode[0].next[1].set = 0x00000000 +userDataNode[0].next[1].binding = 1 +userDataNode[1].visibility = 0xffffffff +userDataNode[1].type = DescriptorTableVaPtr +userDataNode[1].offsetInDwords = 1 +userDataNode[1].sizeInDwords = 1 +userDataNode[1].next[0].type = DescriptorConstBufferCompact +userDataNode[1].next[0].offsetInDwords = 0 +userDataNode[1].next[0].sizeInDwords = 2 +userDataNode[1].next[0].set = 0x0000005D +userDataNode[1].next[0].binding = 17 +userDataNode[1].next[1].type = DescriptorConstBuffer +userDataNode[1].next[1].offsetInDwords = 2 +userDataNode[1].next[1].sizeInDwords = 4 +userDataNode[1].next[1].set = 0x0000005D +userDataNode[1].next[1].binding = 0 +userDataNode[1].next[2].type = DescriptorBuffer +userDataNode[1].next[2].offsetInDwords = 6 +userDataNode[1].next[2].sizeInDwords = 4 +userDataNode[1].next[2].set = 0x0000005D +userDataNode[1].next[2].binding = 1 + +[RayTracingPipelineState] +groups[0].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[0].generalShader = 0 +groups[0].closestHitShader = -1 +groups[0].anyHitShader = -1 +groups[0].intersectionShader = -1 +groups[1].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR +groups[1].closestHitShader = 1 +maxRecursionDepth = 1 +indirectStageMask = 0xffffffff +mode = 2 +rtState.bvhResDescSize = 4 +rtState.bvhResDesc[0] = 0 +rtState.bvhResDesc[1] = 2197815296 +rtState.bvhResDesc[2] = 4294967295 +rtState.bvhResDesc[3] = 2164261887 +rtState.nodeStrideShift = 7 +rtState.threadGroupSizeX = 8 +rtState.threadGroupSizeY = 4 +rtState.threadGroupSizeZ = 1 +rtState.rayQueryCsSwizzle = 1 +rtState.ldsStackSize = 16 +rtState.dispatchRaysThreadGroupSize = 32 +rtState.ldsSizePerThreadGroup = 65536 +rtState.outerTileSize = 4 +rtState.dispatchDimSwizzleMode = 0 +rtState.enableDispatchRaysInnerSwizzle = 1 +rtState.enableDispatchRaysOuterSwizzle = 1 +rtState.enableOptimalLdsStackSizeForIndirect = 1 +rtState.enableOptimalLdsStackSizeForUnified = 1 +payloadSizeMaxInLib = 12 +attributeSizeMaxInLib = 8 +hasPipelineLibrary = 1 diff --git a/llpc/test/shaderdb/gfx12/Float16Dot2WithRTE.spvasm b/llpc/test/shaderdb/gfx12/Float16Dot2WithRTE.spvasm new file mode 100644 index 0000000000..1530f31c36 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/Float16Dot2WithRTE.spvasm @@ -0,0 +1,109 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; This test is to verify that dot(f16vec2, f16vec2) is translated to the HW instruction v_dot2_f16_f16 +; correctly in RTE mode on GFX12. + +; RUN: amdllpc -v -gfxip=12.0.1 %s | FileCheck -check-prefix=SHADERTEST %s + +; SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info +; SHADERTEST: call reassoc nnan ninf nsz arcp contract half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %{{[0-9]*}}, <2 x half> %{{[0-9]*}}, half 0xH0000) + +; SHADERTEST-LABEL: {{^// LLPC}} final ELF info +; SHADERTEST: v_dot2_f16_f16 v{{[0-9]*}}, s{{[0-9]*}}, s{{[0-9]*}}, 0 +; SHADERTEST: .float_mode: 0x00000000000000C0 + + OpCapability Shader + OpCapability RoundingModeRTE + OpCapability FloatControls2 + OpCapability UniformAndStorageBuffer16BitAccess + OpCapability Float16 + OpExtension "SPV_KHR_float_controls" + OpExtension "SPV_KHR_float_controls2" + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "main" %gl_GlobalInvocationID + OpExecutionMode %2 LocalSize 1 1 1 + OpExecutionMode %2 RoundingModeRTE 16 + OpExecutionModeId %2 FPFastMathDefault %half %uint_458767 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpMemberDecorate %_struct_4 0 Offset 0 + OpDecorate %_struct_4 BufferBlock + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 0 + OpDecorate %5 NonWritable + OpMemberDecorate %_struct_6 0 Offset 0 + OpDecorate %_struct_6 BufferBlock + OpDecorate %7 DescriptorSet 0 + OpDecorate %7 Binding 1 + OpDecorate %_arr_half_int_1 ArrayStride 2 + OpDecorate %_arr_half_int_2 ArrayStride 2 + %void = OpTypeVoid + %11 = OpTypeFunction %void + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %v2uint = OpTypeVector %uint 2 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %uint_1 = OpConstant %uint 1 + %half = OpTypeFloat 16 +%_ptr_Uniform_half = OpTypePointer Uniform %half +%_ptr_Function_half = OpTypePointer Function %half + %v2half = OpTypeVector %half 2 + %v3half = OpTypeVector %half 3 + %v4half = OpTypeVector %half 4 +%_ptr_Input_v4half = OpTypePointer Input %v4half +%_ptr_Output_v4half = OpTypePointer Output %v4half + %mat2v2half = OpTypeMatrix %v2half 2 +%_arr_half_int_1 = OpTypeArray %half %int_1 +%_arr_half_int_2 = OpTypeArray %half %int_2 + %_struct_4 = OpTypeStruct %_arr_half_int_2 +%_ptr_Uniform__struct_4 = OpTypePointer Uniform %_struct_4 + %5 = OpVariable %_ptr_Uniform__struct_4 Uniform + %_struct_6 = OpTypeStruct %_arr_half_int_1 +%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6 + %7 = OpVariable %_ptr_Uniform__struct_6 Uniform +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%uint_458767 = OpConstant %uint 458767 + %2 = OpFunction %void None %11 + %34 = OpLabel + %35 = OpAccessChain %_ptr_Uniform_half %5 %int_0 %int_0 + %36 = OpLoad %half %35 + %37 = OpAccessChain %_ptr_Uniform_half %5 %int_0 %int_1 + %38 = OpLoad %half %37 + %39 = OpCompositeConstruct %v2half %36 %36 + %40 = OpCompositeConstruct %v2half %38 %38 + %41 = OpDot %half %39 %40 + %42 = OpAccessChain %_ptr_Uniform_half %7 %int_0 %int_0 + OpStore %42 %41 + OpReturn + OpFunctionEnd diff --git a/llpc/test/shaderdb/gfx12/Float16Dot2WithRTZ.spvasm b/llpc/test/shaderdb/gfx12/Float16Dot2WithRTZ.spvasm new file mode 100644 index 0000000000..72956d11b3 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/Float16Dot2WithRTZ.spvasm @@ -0,0 +1,109 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; This test is to verify that dot(f16vec2, f16vec2) is not translated to the HW instruction v_dot2_f16_f16 +; in RTZ mode on GFX12. v_dot2_f16_f16 only respects RTE mode so the SPIR-V dot operation is still emulated +; by mul+add. + +; RUN: amdllpc -v -gfxip=12.0.1 %s | FileCheck -check-prefix=SHADERTEST %s + +; SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info +; SHADERTEST: [[TMP:%.*]] = fmul reassoc arcp contract afn half %{{[0-9]*}}, %{{[0-9]*}} +; SHADERTEST-NEXT: fadd reassoc arcp contract afn half [[TMP]], [[TMP]] + +; SHADERTEST-LABEL: {{^// LLPC}} final ELF info +; SHADERTEST: s_mul_f16 [[TMP:s.*]], s{{[0-9]*}}, s{{[0-9]*}} +; SHADERTEST-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; SHADERTEST-NEXT: s_add_f16 s{{[0-9]*}}, [[TMP]], [[TMP]] +; SHADERTEST: .float_mode: 0x00000000000000CC + + OpCapability Shader + OpCapability RoundingModeRTZ + OpCapability UniformAndStorageBuffer16BitAccess + OpCapability Float16 + OpExtension "SPV_KHR_float_controls" + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "main" %gl_GlobalInvocationID + OpExecutionMode %2 LocalSize 1 1 1 + OpExecutionMode %2 RoundingModeRTZ 16 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpMemberDecorate %_struct_4 0 Offset 0 + OpDecorate %_struct_4 BufferBlock + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 0 + OpDecorate %5 NonWritable + OpMemberDecorate %_struct_6 0 Offset 0 + OpDecorate %_struct_6 BufferBlock + OpDecorate %7 DescriptorSet 0 + OpDecorate %7 Binding 1 + OpDecorate %_arr_half_int_1 ArrayStride 2 + OpDecorate %_arr_half_int_2 ArrayStride 2 + %void = OpTypeVoid + %11 = OpTypeFunction %void + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %v2uint = OpTypeVector %uint 2 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %uint_1 = OpConstant %uint 1 + %half = OpTypeFloat 16 +%_ptr_Uniform_half = OpTypePointer Uniform %half +%_ptr_Function_half = OpTypePointer Function %half + %v2half = OpTypeVector %half 2 + %v3half = OpTypeVector %half 3 + %v4half = OpTypeVector %half 4 +%_ptr_Input_v4half = OpTypePointer Input %v4half +%_ptr_Output_v4half = OpTypePointer Output %v4half + %mat2v2half = OpTypeMatrix %v2half 2 +%_arr_half_int_1 = OpTypeArray %half %int_1 +%_arr_half_int_2 = OpTypeArray %half %int_2 + %_struct_4 = OpTypeStruct %_arr_half_int_2 +%_ptr_Uniform__struct_4 = OpTypePointer Uniform %_struct_4 + %5 = OpVariable %_ptr_Uniform__struct_4 Uniform + %_struct_6 = OpTypeStruct %_arr_half_int_1 +%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6 + %7 = OpVariable %_ptr_Uniform__struct_6 Uniform +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %2 = OpFunction %void None %11 + %34 = OpLabel + %35 = OpAccessChain %_ptr_Uniform_half %5 %int_0 %int_0 + %36 = OpLoad %half %35 + %37 = OpAccessChain %_ptr_Uniform_half %5 %int_0 %int_1 + %38 = OpLoad %half %37 + %39 = OpCompositeConstruct %v2half %36 %36 + %40 = OpCompositeConstruct %v2half %38 %38 + %41 = OpDot %half %39 %40 + %42 = OpAccessChain %_ptr_Uniform_half %7 %int_0 %int_0 + OpStore %42 %41 + OpReturn + OpFunctionEnd diff --git a/llpc/test/shaderdb/gfx12/Float16Dot2WithSignedZeros.spvasm b/llpc/test/shaderdb/gfx12/Float16Dot2WithSignedZeros.spvasm new file mode 100644 index 0000000000..3b23783c20 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/Float16Dot2WithSignedZeros.spvasm @@ -0,0 +1,129 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; This test is to verify that dot(f16vec2, f16vec2) is not translated to the HW instruction v_dot2_f16_f16 +; in signed zero mode on GFX12. The test checks this result: dot2(<-0.0, -0.0>, <1.0, 1.0>) ?= -0.0. +; v_dot2_f16_f16 is not IEEE compliant and doesn't respect signed zeros. + +; RUN: amdllpc -v -gfxip=12.0.1 %s | FileCheck -check-prefix=SHADERTEST %s + +; SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info +; SHADERTEST: [[MUL:%.*]] = fmul nnan ninf arcp contract <2 x half> %{{.*}}, %{{.*}} +; SHADERTEST-NEXT: [[ELEM0:%.*]] = extractelement <2 x half> [[MUL]], i64 0 +; SHADERTEST-NEXT: [[ELEM1:%.*]] = extractelement <2 x half> [[MUL]], i64 0 +; SHADERTEST-NEXT: fadd nnan ninf arcp contract half [[ELEM0]], [[ELEM1]] + +; SHADERTEST-LABEL: {{^// LLPC}} final ELF info +; SHADERTEST: s_mul_f16 [[TMP:s.*]], s{{[0-9]*}}, s{{[0-9]*}} +; SHADERTEST: s_add_f16 s{{[0-9]*}}, [[TMP]], [[TMP]] + +; SPIR-V +; Version: 1.2 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 59 +; Schema: 0 + OpCapability Shader + OpCapability FloatControls2 + OpCapability Float16 + OpExtension "SPV_KHR_float_controls2" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "main" %gl_GlobalInvocationID + OpExecutionMode %2 LocalSize 1 1 1 + OpExecutionModeId %2 FPFastMathDefault %half %uint_458763 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpMemberDecorate %_struct_6 0 Offset 0 + OpDecorate %_struct_6 BufferBlock + OpDecorate %7 DescriptorSet 0 + OpDecorate %7 Binding 0 + OpDecorate %7 NonWritable + OpMemberDecorate %_struct_8 0 Offset 0 + OpDecorate %_struct_8 BufferBlock + OpDecorate %9 DescriptorSet 0 + OpDecorate %9 Binding 1 + OpDecorate %_arr_half_int_1 ArrayStride 2 + OpDecorate %_arr_half_int_2 ArrayStride 2 + OpDecorate %_arr_uint_int_1 ArrayStride 4 + %void = OpTypeVoid + %14 = OpTypeFunction %void + %bool = OpTypeBool + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %v2uint = OpTypeVector %uint 2 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %half = OpTypeFloat 16 +%_ptr_Uniform_half = OpTypePointer Uniform %half +%_ptr_Function_half = OpTypePointer Function %half + %v2half = OpTypeVector %half 2 + %v3half = OpTypeVector %half 3 + %v4half = OpTypeVector %half 4 +%_ptr_Input_v4half = OpTypePointer Input %v4half +%_ptr_Output_v4half = OpTypePointer Output %v4half + %mat2v2half = OpTypeMatrix %v2half 2 +%_arr_half_int_1 = OpTypeArray %half %int_1 +%_arr_half_int_2 = OpTypeArray %half %int_2 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_arr_uint_int_1 = OpTypeArray %uint %int_1 + %_struct_6 = OpTypeStruct %_arr_uint_int_1 +%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6 + %7 = OpVariable %_ptr_Uniform__struct_6 Uniform + %_struct_8 = OpTypeStruct %_arr_uint_int_1 +%_ptr_Uniform__struct_8 = OpTypePointer Uniform %_struct_8 + %9 = OpVariable %_ptr_Uniform__struct_8 Uniform +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%half_n0x1p_0 = OpConstant %half -0x1p+0 +%half_n0x1_4ap_1 = OpConstant %half -0x1.4ap+1 +%half_0x0p_0 = OpConstant %half 0x0p+0 +%half_0x1pn1 = OpConstant %half 0x1p-1 +%half_0x1p_0 = OpConstant %half 0x1p+0 +%half_0x1p_1 = OpConstant %half 0x1p+1 +%half_0x1_8p_1 = OpConstant %half 0x1.8p+1 +%half_0x1p_2 = OpConstant %half 0x1p+2 +%half_0x1_4p_2 = OpConstant %half 0x1.4p+2 +%half_0x1_8p_2 = OpConstant %half 0x1.8p+2 +%half_0x1pn14 = OpConstant %half 0x1p-14 +%uint_458763 = OpConstant %uint 458763 + %2 = OpFunction %void None %14 + %47 = OpLabel + %48 = OpAccessChain %_ptr_Uniform_uint %7 %int_0 %int_0 + %49 = OpLoad %uint %48 + %50 = OpBitcast %v2half %49 + %51 = OpCompositeExtract %half %50 0 + %52 = OpCompositeExtract %half %50 1 + %53 = OpCompositeConstruct %v2half %51 %51 + %54 = OpCompositeConstruct %v2half %52 %52 + %55 = OpDot %half %53 %54 + %56 = OpCompositeConstruct %v2half %55 %half_0x0p_0 + %57 = OpBitcast %uint %56 + %58 = OpAccessChain %_ptr_Uniform_uint %9 %int_0 %int_0 + OpStore %58 %57 + OpReturn + OpFunctionEnd diff --git a/llpc/test/shaderdb/gfx12/ImageAtomicFAdd.vert b/llpc/test/shaderdb/gfx12/ImageAtomicFAdd.vert new file mode 100644 index 0000000000..6a46a8a9d0 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/ImageAtomicFAdd.vert @@ -0,0 +1,45 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// This test is to verify that imageAtomicAdd with float type is correctly translated to the HW instruction +// image_atomic_add_flt on GFX12. + +// RUN: amdllpc -v -gfxip=12.0.1 %s | FileCheck -check-prefix=SHADERTEST %s + +// SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info +// SHADERTEST: call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.image.atomic.add.flt.1d.f32.i16.v8i32(float 1.000000e+00, i16 0, <8 x i32> %{{[0-9]*}}, i32 0, i32 0) + +// SHADERTEST-LABEL: {{^// LLPC}} final ELF info +// SHADERTEST: image_atomic_add_flt {{v[0-9]*}}, {{v[0-9]*}}, {{s[[0-9]*:[0-9]*]}} dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN a16 + +#version 450 core +#extension GL_EXT_shader_atomic_float: enable + +layout(binding = 0, r32f) uniform image1D i1D; + +void main() +{ + imageAtomicAdd(i1D, 0, 1.0); +} diff --git a/llpc/test/shaderdb/gfx12/ImageAtomicFMinMax.vert b/llpc/test/shaderdb/gfx12/ImageAtomicFMinMax.vert new file mode 100644 index 0000000000..0f99efda90 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/ImageAtomicFMinMax.vert @@ -0,0 +1,48 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// This test is to verify that imageAtomicMin/imageAtomicMax with float type is correctly translated to the HW +// instruction image_atomic_min_flt/image_atomic_max_flt on GFX12. + +// RUN: amdllpc -v -gfxip=12.0.1 %s | FileCheck -check-prefix=SHADERTEST %s + +// SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info +// SHADERTEST: call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.image.atomic.min.flt.1d.f32.i16.v8i32(float 2.000000e+00, i16 0, <8 x i32> %{{[0-9]*}}, i32 0, i32 0) +// SHADERTEST-NEXT: call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.image.atomic.max.flt.1d.f32.i16.v8i32(float 5.000000e+00, i16 0, <8 x i32> %{{[0-9]*}}, i32 0, i32 0) + +// SHADERTEST-LABEL: {{^// LLPC}} final ELF info +// SHADERTEST: image_atomic_min_flt {{v[0-9]*}}, {{v[0-9]*}}, {{s[[0-9]*:[0-9]*]}} dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN a16 +// SHADERTEST-NEXT: image_atomic_max_flt {{v[0-9]*}}, {{v[0-9]*}}, {{s[[0-9]*:[0-9]*]}} dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN a16 + +#version 450 core +#extension GL_EXT_shader_atomic_float2: enable + +layout(binding = 0, r32f) uniform image1D i1D; + +void main() +{ + imageAtomicMin(i1D, 0, 2.0); + imageAtomicMax(i1D, 0, 5.0); +} diff --git a/llpc/test/shaderdb/gfx12/PipelineCs_16BitSBufferLoadConversion.pipe b/llpc/test/shaderdb/gfx12/PipelineCs_16BitSBufferLoadConversion.pipe new file mode 100644 index 0000000000..c2ff6161dc --- /dev/null +++ b/llpc/test/shaderdb/gfx12/PipelineCs_16BitSBufferLoadConversion.pipe @@ -0,0 +1,92 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; Check that padBufferSizeToNextDword is set correctly by llpc and it enables the conversion to s_buffer_load_u16 +; This is only possible on GFX12 if the buffer size is padded, which we do in the Vulkan driver + +; BEGIN_SHADERTEST +; RUN: amdllpc -v -gfxip=12.0.1 %s --emit-llvm | FileCheck -check-prefix=SHADERTEST %s +; SHADERTEST-LABEL: {{^// LLPC}} LGC lowering results +; SHADERTEST: call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> [[desc:%[0-9]+]], i32 0, i32 0), !invariant.load !{{[0-9]+}} +; SHADERTEST: call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> [[desc]], i32 2, i32 0), !invariant.load !{{[0-9]+}} +; SHADERTEST: call i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32> [[desc]], i32 4, i32 0), !invariant.load !{{[0-9]+}} +; REQUIRES: do-not-run-me + +[CsGlsl] +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types: require + +layout(local_size_x = 1) in; + +struct Inputs +{ + float16_t in0; + float16_t in1; + float16_t in2; +}; + +layout(set = 0, binding = 0, std430) buffer InBuffer +{ + Inputs inputs[]; +}; +layout(set = 0, binding = 1, std430) buffer OutBuffer +{ + Inputs outputs[]; +}; + +void main (void) +{ + float16_t in0 = float16_t(inputs[0].in0); + float16_t in1 = float16_t(inputs[0].in1); + float16_t in2 = float16_t(inputs[0].in2); + + + outputs[0].in0 = in0; + outputs[0].in1 = in1; + outputs[0].in2 = in2; +} + +[CsInfo] +entryPoint = main +userDataNode[0].visibility = 128 +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 1 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorBuffer +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 4 +userDataNode[0].next[0].set = 0x00000000 +userDataNode[0].next[0].binding = 0 +userDataNode[0].next[0].strideInDwords = 0 +userDataNode[0].next[1].type = DescriptorBuffer +userDataNode[0].next[1].offsetInDwords = 4 +userDataNode[0].next[1].sizeInDwords = 4 +userDataNode[0].next[1].set = 0x00000000 +userDataNode[0].next[1].binding = 1 +userDataNode[0].next[1].strideInDwords = 0 + +[ComputePipelineState] +options.padBufferSizeToNextDword = 1 diff --git a/llpc/test/shaderdb/gfx12/PipelineRays_SetAutoCompileContinuations.pipe b/llpc/test/shaderdb/gfx12/PipelineRays_SetAutoCompileContinuations.pipe new file mode 100644 index 0000000000..522829a586 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/PipelineRays_SetAutoCompileContinuations.pipe @@ -0,0 +1,175 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; Check that the ray tracing mode option is set to auto and the pipeline is compiled in continuations mode. +; This ensures pipeline compiles in indirect mode for GFX12 is in continuations mode instead of legacy mode. + +; RUN: amdllpc -gfxip 12.0.1 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK %s + +; CHECK-LABEL: @_amdgpu_cs_main( +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. + +; CHECK-LABEL: @_rgen_1( +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. + +; CHECK-LABEL: @_rgen_1.resume.0( +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. +; CHECK: unreachable +; CHECK: ret void + +; CHECK-LABEL: @_chit_2( +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. + +; CHECK-LABEL: @_cs_( +; CHECK: call { <10 x i32>, <3 x float>, <3 x float> } @llvm.amdgcn.image.bvh8.intersect.ray +; CHECK-NOT: ret void +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. +; CHECK-NOT: ret void + +[Version] +version = 69 + +[rgenGlsl] +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + vec3 color; +}; + +layout(binding = 0, set = 0) uniform accelerationStructureEXT g_bvh; +layout(binding = 1, set = 0, rgba32f) uniform image2D g_dst; + +layout(location = 14) rayPayloadEXT RayPayload g_ray; + +void main() { + vec3 origin; + origin.x = gl_LaunchIDEXT.x; + origin.y = gl_LaunchIDEXT.y; + origin.z = 0; + + traceRayEXT(g_bvh, /* ray flags */ 0, /* cull mask */ 0xff, + /* sbt offset */ 0, /* sbt stride */ 1, /* miss index */ 0, + origin.xyz, /* tmin */ 0.0, /* direction */ vec3(1, 0, 0), + /* tmax */ 48.0, /* payload location */ 14); + + imageStore(g_dst, ivec2(gl_LaunchIDEXT.xy), vec4(g_ray.color, 0)); +} + +[rgenInfo] +entryPoint = main + +[chitGlsl] +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + vec3 color; +}; + +layout(shaderRecordEXT, std430) buffer sbt { + float z; +}; + +hitAttributeEXT vec2 g_hit; +rayPayloadInEXT RayPayload g_ray; + +void main() { + g_ray.color.xy = g_hit; + g_ray.color.z = z; +} + +[chitInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 0xffffffff +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorConstBuffer +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 4 +userDataNode[0].next[0].set = 0x00000000 +userDataNode[0].next[0].binding = 0 +userDataNode[0].next[1].type = DescriptorImage +userDataNode[0].next[1].offsetInDwords = 4 +userDataNode[0].next[1].sizeInDwords = 8 +userDataNode[0].next[1].set = 0x00000000 +userDataNode[0].next[1].binding = 1 +userDataNode[1].visibility = 0xffffffff +userDataNode[1].type = DescriptorTableVaPtr +userDataNode[1].offsetInDwords = 1 +userDataNode[1].sizeInDwords = 1 +userDataNode[1].next[0].type = DescriptorConstBufferCompact +userDataNode[1].next[0].offsetInDwords = 0 +userDataNode[1].next[0].sizeInDwords = 2 +userDataNode[1].next[0].set = 0x0000005D +userDataNode[1].next[0].binding = 17 +userDataNode[1].next[1].type = DescriptorConstBuffer +userDataNode[1].next[1].offsetInDwords = 2 +userDataNode[1].next[1].sizeInDwords = 4 +userDataNode[1].next[1].set = 0x0000005D +userDataNode[1].next[1].binding = 0 +userDataNode[1].next[2].type = DescriptorBuffer +userDataNode[1].next[2].offsetInDwords = 6 +userDataNode[1].next[2].sizeInDwords = 4 +userDataNode[1].next[2].set = 0x0000005D +userDataNode[1].next[2].binding = 1 + +[RayTracingPipelineState] +groups[0].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[0].generalShader = 0 +groups[0].closestHitShader = -1 +groups[0].anyHitShader = -1 +groups[0].intersectionShader = -1 +groups[1].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR +groups[1].closestHitShader = 1 +maxRecursionDepth = 1 +indirectStageMask = 0xffffffff +mode = 0 +rtState.bvhResDescSize = 4 +rtState.bvhResDesc[0] = 0 +rtState.bvhResDesc[1] = 2197815296 +rtState.bvhResDesc[2] = 4294967295 +rtState.bvhResDesc[3] = 2164261887 +rtState.nodeStrideShift = 7 +rtState.threadGroupSizeX = 8 +rtState.threadGroupSizeY = 4 +rtState.threadGroupSizeZ = 1 +rtState.rayQueryCsSwizzle = 1 +rtState.ldsStackSize = 16 +rtState.dispatchRaysThreadGroupSize = 32 +rtState.ldsSizePerThreadGroup = 65536 +rtState.outerTileSize = 4 +rtState.dispatchDimSwizzleMode = 0 +rtState.enableDispatchRaysInnerSwizzle = 1 +rtState.enableDispatchRaysOuterSwizzle = 1 +rtState.enableOptimalLdsStackSizeForIndirect = 1 +rtState.enableOptimalLdsStackSizeForUnified = 1 +payloadSizeMaxInLib = 12 +attributeSizeMaxInLib = 8 +hasPipelineLibrary = 0 diff --git a/llpc/test/shaderdb/gfx12/SubgroupQuadBroadcast.frag b/llpc/test/shaderdb/gfx12/SubgroupQuadBroadcast.frag new file mode 100644 index 0000000000..9503ff2995 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/SubgroupQuadBroadcast.frag @@ -0,0 +1,81 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py +// RUN: amdllpc -o - -gfxip 12.0.1 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s +#version 450 + +#extension GL_KHR_shader_subgroup_quad : require + +layout(binding = 0) readonly buffer Block0 +{ + float alpha[]; +}; + +layout(location = 0) out vec4 color; + +void main() +{ + ivec2 coord = ivec2(gl_FragCoord.xy); + float v = alpha[coord.y * 2 + coord.x]; + + vec4 lanes; + for (uint i = 0; i < 4; ++i) { + lanes[i] = subgroupQuadBroadcast(v, i); + } + + color = lanes; +} +// CHECK-LABEL: amdgpu_ps_main: +// CHECK: s_mov_b64 s[4:5], exec +// CHECK-NEXT: s_wqm_b64 exec, exec +// CHECK-NEXT: s_getpc_b64 s[2:3] +// CHECK-NEXT: s_mov_b32 s0, s1 +// CHECK-NEXT: s_sext_i32_i16 s3, s3 +// CHECK-NEXT: v_cvt_i32_f32_e32 v0, v2 +// CHECK-NEXT: s_mov_b32 s1, s3 +// CHECK-NEXT: v_cvt_i32_f32_e32 v1, v3 +// CHECK-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 +// CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +// CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +// CHECK-NEXT: v_lshl_add_u32 v0, v1, 3, v0 +// CHECK-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0 +// CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +// CHECK-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1 +// CHECK-NEXT: v_and_b32_e32 v2, 0x7c, v1 +// CHECK-NEXT: v_or_b32_e32 v1, 3, v1 +// CHECK-NEXT: s_wait_kmcnt 0x0 +// CHECK-NEXT: buffer_load_b32 v0, v0, s[0:3], null offen +// CHECK-NEXT: v_or_b32_e32 v3, 1, v2 +// CHECK-NEXT: v_or_b32_e32 v4, 2, v2 +// CHECK-NEXT: s_wait_loadcnt 0x0 +// CHECK-NEXT: v_permlane16_var_b32 v2, v0, v2 op_sel:[0,1] +// CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +// CHECK-NEXT: v_permlane16_var_b32 v3, v0, v3 op_sel:[0,1] +// CHECK-NEXT: v_permlane16_var_b32 v4, v0, v4 op_sel:[0,1] +// CHECK-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1] +// CHECK-NEXT: s_and_b64 exec, exec, s[4:5] +// CHECK-NEXT: export mrt0 v2, v3, v4, v0 done +// CHECK-NEXT: s_endpgm diff --git a/llpc/test/shaderdb/gfx12/TuningTemporalHints.pipe b/llpc/test/shaderdb/gfx12/TuningTemporalHints.pipe new file mode 100644 index 0000000000..1125ccd228 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/TuningTemporalHints.pipe @@ -0,0 +1,135 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function amdgpu_gs_main +; RUN: amdllpc -gfxip 12.0.1 -filetype=asm -o - %s | FileCheck -check-prefix=CHECK %s +; Test tuning temporal hints: +; options.temporalHintControl=0xb represents that ATM temporal hint is TH_STORE_RT_WB. +[Version] +version = 72 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 inPos; +layout(location = 1) in vec4 inColor; +layout(location = 0) out vec4 oColor; + +void main() { + gl_Position = inPos; + oColor = inColor; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) in vec4 oColor; +layout(location = 0) out vec4 outColor; + +void main() { + outColor = oColor; +} + +[FsInfo] +entryPoint = main + +[GraphicsPipelineState] +colorBuffer[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +options.temporalHintControl = 0xb + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 32 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +attribute[1].location = 1 +attribute[1].binding = 0 +attribute[1].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[1].offset = 16 + +; CHECK-LABEL: amdgpu_gs_main: +; CHECK: s_mov_b64 exec, -1 +; CHECK-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0 +; CHECK-NEXT: s_lshr_b32 s0, s3, 18 +; CHECK-NEXT: s_bfe_u32 s3, s2, 0x9000c +; CHECK-NEXT: s_and_b32 s0, s0, 0x3c0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1 +; CHECK-NEXT: v_add_nc_u32_e32 v9, s0, v1 +; CHECK-NEXT: s_mov_b64 s[0:1], exec +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_cmpx_gt_u32_e64 s3, v9 +; CHECK-NEXT: s_cbranch_execz .LBB0_2 +; CHECK-NEXT: s_getpc_b64 s[12:13] +; CHECK-NEXT: s_mov_b32 s6, s9 +; CHECK-NEXT: s_sext_i32_i16 s13, s13 +; CHECK-NEXT: v_add_nc_u32_e32 v5, s10, v3 +; CHECK-NEXT: s_mov_b32 s7, s13 +; CHECK-NEXT: s_load_b128 s[12:15], s[6:7], 0x0 +; CHECK-NEXT: s_wait_kmcnt 0x0 +; CHECK-NEXT: s_clause 0x1 +; CHECK-NEXT: tbuffer_load_format_xyzw v[1:4], v5, s[12:15], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen +; CHECK-NEXT: tbuffer_load_format_xyzw v[5:8], v5, s[12:15], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:16 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] +; CHECK-NEXT: s_bfe_u32 s0, s2, 0x90016 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s0, v9 +; CHECK-NEXT: s_and_saveexec_b64 s[0:1], vcc +; CHECK-NEXT: s_cbranch_execz .LBB0_4 +; CHECK-NEXT: export prim v0, off, off, off done +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: s_wait_expcnt 0x0 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-NEXT: s_mov_b64 s[0:1], exec +; CHECK-NEXT: v_cmpx_gt_u32_e64 s3, v9 +; CHECK-NEXT: s_cbranch_execz .LBB0_6 +; CHECK-NEXT: s_getpc_b64 s[0:1] +; CHECK-NEXT: s_lshl_b32 s4, s5, 9 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_sext_i32_i16 s1, s1 +; CHECK-NEXT: s_and_b32 s4, s4, 0xfffe00 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_mov_b32 s9, s1 +; CHECK-NEXT: s_load_b128 s[0:3], s[8:9], 0xb0 +; CHECK-NEXT: s_wait_loadcnt 0x1 +; CHECK-NEXT: export pos0 v1, v2, v3, v4 done +; CHECK-NEXT: s_wait_loadcnt 0x0 +; CHECK-NEXT: s_wait_kmcnt 0x0 +; CHECK-NEXT: buffer_store_b128 v[5:8], v9, s[0:3], s4 idxen th:TH_STORE_RT_WB scope:SCOPE_DEV +; CHECK-NEXT: .LBB0_6: +; CHECK-NEXT: s_endpgm diff --git a/llpc/test/shaderdb/gfx12/WorkgroupRoundRobin.pipe b/llpc/test/shaderdb/gfx12/WorkgroupRoundRobin.pipe new file mode 100644 index 0000000000..fceb097ce0 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/WorkgroupRoundRobin.pipe @@ -0,0 +1,96 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; RUN: amdllpc -v -gfxip=12.0.1 %s | FileCheck -check-prefix=SHADERTEST %s + +; SHADERTEST-LABEL: {{^// LLPC}} final ELF info +; SHADERTEST: .gs: { +; SHADERTEST: .wg_round_robin: 1 +; SHADERTEST: .ps: { +; SHADERTEST: .wg_round_robin: 1 + + +[VsGlsl] +#version 450 + +layout( location = 0 ) in vec4 app_position; + +void main() { + gl_Position = app_position; +} + +[VsInfo] +entryPoint = main +options.workgroupRoundRobin = 1 + +[FsGlsl] +#version 450 + +layout( location = 0 ) out vec4 frag_color; + +layout( push_constant ) uniform ColorBlock { + vec4 Color; +} PushConstant; + +void main() { + frag_color = PushConstant.Color; +} + +[FsInfo] +entryPoint = main +options.workgroupRoundRobin = 1 + +[ResourceMapping] +userDataNode[0].visibility = 2 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 4 +userDataNode[1].visibility = 66 +userDataNode[1].type = DescriptorTableVaPtr +userDataNode[1].offsetInDwords = 1 +userDataNode[1].sizeInDwords = 1 +userDataNode[1].next[0].type = PushConst +userDataNode[1].next[0].offsetInDwords = 0 +userDataNode[1].next[0].sizeInDwords = 4 +userDataNode[1].next[0].set = 0xFFFFFFFF +userDataNode[1].next[0].binding = 0 + +[GraphicsPipelineState] +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 0 +options.resourceLayoutScheme = Indirect + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 12 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32_SFLOAT +attribute[0].offset = 0 diff --git a/llpc/test/shaderdb/gfx12/lit.local.cfg b/llpc/test/shaderdb/gfx12/lit.local.cfg new file mode 100644 index 0000000000..a8e64f58e9 --- /dev/null +++ b/llpc/test/shaderdb/gfx12/lit.local.cfg @@ -0,0 +1,36 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +if "llpc_build_gfx12" not in config.available_features: + config.unsupported = True + +# overwrite %gfxip in config.substitutions +config.gfxip = '-gfxip=12.0.1' + +index = 0; +for substitution in config.substitutions : + if substitution[0] == '%gfxip' : + config.substitutions[index] = ('%gfxip', config.gfxip); + index += 1; diff --git a/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe b/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe index 3717ab3036..b7cc7915c4 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe index 795bc5779c..f3699ced94 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe index 2826355cb6..22e103dcef 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe index 6b7b337d94..a32b3634d5 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPacking.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPacking.spvasm index 1a54d80d83..58c4341f7e 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPacking.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPacking.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPackingFullStruct.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPackingFullStruct.spvasm index 0788c1139b..350a2539e6 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPackingFullStruct.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPackingFullStruct.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestLoadRowMajorMatrixInStruct.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestLoadRowMajorMatrixInStruct.spvasm index 27df03360b..1062ec4cbe 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestLoadRowMajorMatrixInStruct.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestLoadRowMajorMatrixInStruct.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestStoreRowMajorMatrixInStruct.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestStoreRowMajorMatrixInStruct.spvasm index 239a308efb..82c4a2b95c 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestStoreRowMajorMatrixInStruct.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestStoreRowMajorMatrixInStruct.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestStructuredBuffers.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestStructuredBuffers.spvasm index fbcdee0a1f..7351f6c96d 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestStructuredBuffers.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestStructuredBuffers.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/multiple_inputs/SpirvTwoEntryPoints.spvasm b/llpc/test/shaderdb/multiple_inputs/SpirvTwoEntryPoints.spvasm index 999e86a8e1..c4a50baf6c 100644 --- a/llpc/test/shaderdb/multiple_inputs/SpirvTwoEntryPoints.spvasm +++ b/llpc/test/shaderdb/multiple_inputs/SpirvTwoEntryPoints.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs1.frag b/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs1.frag index 860afb3530..ea6a00fde7 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs1.frag +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs1.frag @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs2.frag b/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs2.frag index 2516098fb2..28df1fb217 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs2.frag +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs2.frag @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs1.pipe b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs1.pipe index 1e3e53eb5a..1c21d229ab 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs1.pipe +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs1.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs2.pipe b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs2.pipe index 8b5d7f8e42..61df3f3ddb 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs2.pipe +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs2.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe index 255a8d2193..bfb8654216 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs1.vert b/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs1.vert index 8513469a20..b619ea5f78 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs1.vert +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs1.vert @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs2.vert b/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs2.vert index b3a64dcb4d..1dad304c8a 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs2.vert +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs2.vert @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjConstant_TestArray_lit.frag b/llpc/test/shaderdb/object/ObjConstant_TestArray_lit.frag index 89d0db6fc5..0384d8d738 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestArray_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjConstant_TestElementReuse.frag b/llpc/test/shaderdb/object/ObjConstant_TestElementReuse.frag index aee18962a4..961e74e6e1 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestElementReuse.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestElementReuse.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjConstant_TestMatrix_lit.frag b/llpc/test/shaderdb/object/ObjConstant_TestMatrix_lit.frag index 46917fecca..34e7fd1527 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestMatrix_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantArraySize_lit.vert b/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantArraySize_lit.vert index f97df8cd53..83139b3eeb 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantArraySize_lit.vert +++ b/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantArraySize_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantOp_lit.vert b/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantOp_lit.vert index 928854cf5a..3dc9337ca0 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantOp_lit.vert +++ b/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantOp_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjConstant_TestSpecialSourceSwizzle_lit.frag b/llpc/test/shaderdb/object/ObjConstant_TestSpecialSourceSwizzle_lit.frag index a958775c86..5041168f1f 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestSpecialSourceSwizzle_lit.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestSpecialSourceSwizzle_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjConstant_TestStruct_lit.frag b/llpc/test/shaderdb/object/ObjConstant_TestStruct_lit.frag index 416958cd86..770bc1373f 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestStruct_lit.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestStruct_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag b/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag index eb100b51fd..8466f3360d 100644 --- a/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag +++ b/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag b/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag index 4d16acf021..684745c376 100644 --- a/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestDrawParams_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestDrawParams_lit.vert index 4031d8133d..00eb1535ff 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestDrawParams_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestDrawParams_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag index f0ac07778f..d880e0f777 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsBuiltIn_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsBuiltIn_lit.frag index 119572c8c6..c58cdab90f 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsBuiltIn_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsBuiltIn_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag index 9281505450..24a0c4351b 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag index 9caf08e7d2..ee4c4c8100 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag index 64b4633003..087bc514fb 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag index 15b21e10cf..ac754abc2b 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag index 64de19aa38..c4e881c9e4 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag index 72971ee743..61a9af183c 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag index a8199016d5..f13a647f3e 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag index 8d71331f9d..d3cc8dd7a9 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsNonVolatileHelperInvocation.spvasm b/llpc/test/shaderdb/object/ObjInput_TestFsNonVolatileHelperInvocation.spvasm index fe184d361e..86265655bc 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsNonVolatileHelperInvocation.spvasm +++ b/llpc/test/shaderdb/object/ObjInput_TestFsNonVolatileHelperInvocation.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag index 85461e2cd2..edb622112c 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag index d291084a0a..3fb1734914 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsVolatileHelperInvocation.spvasm b/llpc/test/shaderdb/object/ObjInput_TestFsVolatileHelperInvocation.spvasm index eef988cc73..689550c3c7 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsVolatileHelperInvocation.spvasm +++ b/llpc/test/shaderdb/object/ObjInput_TestFsVolatileHelperInvocation.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom b/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom index 57b857127b..014f61b718 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom +++ b/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestGsBuiltIn_lit.geom b/llpc/test/shaderdb/object/ObjInput_TestGsBuiltIn_lit.geom index 5be0569033..db67ae387a 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestGsBuiltIn_lit.geom +++ b/llpc/test/shaderdb/object/ObjInput_TestGsBuiltIn_lit.geom @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom b/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom index fde8c71bee..d6b65219ec 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom +++ b/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag index 49740b37ba..298c3686eb 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function _amdgpu_ps_main diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc b/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc index 41e42a192a..2b1b073b56 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsBuiltIn_lit.tesc b/llpc/test/shaderdb/object/ObjInput_TestTcsBuiltIn_lit.tesc index bb1353b3b1..9bcbd1a71d 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsBuiltIn_lit.tesc +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsBuiltIn_lit.tesc @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc b/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc index 6117cac882..aa91b2c725 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm b/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm index ad8c75faec..e2afad328a 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsViewIndex.spvasm b/llpc/test/shaderdb/object/ObjInput_TestTcsViewIndex.spvasm index d5709533e1..9bf5c1ec20 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsViewIndex.spvasm +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsViewIndex.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese index 2bb122babe..13b4ce2f02 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesBuiltIn_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesBuiltIn_lit.tese index 8bb5d99a78..804daec58a 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesBuiltIn_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesBuiltIn_lit.tese @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese index ee730094c3..229964bcd6 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese index 0df785ec09..e75fc7ae61 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese index 95bc59ef4c..112ea68d84 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesConstExpr_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesConstExpr_lit.tese index 6a1260df07..623caf6012 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesConstExpr_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesConstExpr_lit.tese @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert index ccea90ad2c..a297640d3b 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsBuiltIn_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsBuiltIn_lit.vert index 4edf368ee6..47ab3c97ea 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsBuiltIn_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsBuiltIn_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert index 8dccc61122..5441577fff 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert index f1f55d3ac5..a63edadb73 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsMatrix.vert b/llpc/test/shaderdb/object/ObjInput_TestVsMatrix.vert index c8c09ca6f1..da8dbb57e7 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsMatrix.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsMatrix.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsMatrixArray.vert b/llpc/test/shaderdb/object/ObjInput_TestVsMatrixArray.vert index 1f0a675973..00492abb99 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsMatrixArray.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsMatrixArray.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert index 0cdfeac69a..d9f51c5fa9 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag b/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag index 2c7455aa27..19c7a648d0 100644 --- a/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag +++ b/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ #extension GL_EXT_nonuniform_qualifier : require diff --git a/llpc/test/shaderdb/object/ObjNonUniform_TestMinNonUniform.spvasm b/llpc/test/shaderdb/object/ObjNonUniform_TestMinNonUniform.spvasm index 6a2522dbc1..71beb495ff 100644 --- a/llpc/test/shaderdb/object/ObjNonUniform_TestMinNonUniform.spvasm +++ b/llpc/test/shaderdb/object/ObjNonUniform_TestMinNonUniform.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsBasic_lit.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsBasic_lit.frag index 54b2d35e2a..97d73bc558 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsBasic_lit.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsBasic_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsBuiltIn_lit.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsBuiltIn_lit.frag index dc91ddb7fe..eb5ada0f59 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsBuiltIn_lit.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsBuiltIn_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsCompSpecifier_lit.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsCompSpecifier_lit.frag index bc5e119bcc..19eda0454a 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsCompSpecifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsCompSpecifier_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsNoOut.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsNoOut.frag index a7d772c204..6e8af93b4d 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsNoOut.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsNoOut.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsVector_lit.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsVector_lit.frag index 49a3950b47..959997bfea 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsVector_lit.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsVector_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestGsBasic_lit.geom b/llpc/test/shaderdb/object/ObjOutput_TestGsBasic_lit.geom index c8536c30a4..02178b2a38 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestGsBasic_lit.geom +++ b/llpc/test/shaderdb/object/ObjOutput_TestGsBasic_lit.geom @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestGsBuiltIn_lit.geom b/llpc/test/shaderdb/object/ObjOutput_TestGsBuiltIn_lit.geom index 9fed0f65bb..88884fbdc2 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestGsBuiltIn_lit.geom +++ b/llpc/test/shaderdb/object/ObjOutput_TestGsBuiltIn_lit.geom @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestGsCompSpecifier_lit.geom b/llpc/test/shaderdb/object/ObjOutput_TestGsCompSpecifier_lit.geom index e594e776b6..3a89fac8a9 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestGsCompSpecifier_lit.geom +++ b/llpc/test/shaderdb/object/ObjOutput_TestGsCompSpecifier_lit.geom @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestLlpcOpt.frag b/llpc/test/shaderdb/object/ObjOutput_TestLlpcOpt.frag index f721e89d86..440e40766e 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestLlpcOpt.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestLlpcOpt.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestOpt.frag b/llpc/test/shaderdb/object/ObjOutput_TestOpt.frag index 5f482c396a..fe3541c3bb 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestOpt.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestOpt.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc index 15964efe1b..13cf7bb8d5 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsBuiltIn_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsBuiltIn_lit.tesc index e144ee2991..1da7186cb4 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsBuiltIn_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsBuiltIn_lit.tesc @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsCompSpecifier_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsCompSpecifier_lit.tesc index b5c0f705dc..8bae6830be 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsCompSpecifier_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsCompSpecifier_lit.tesc @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexOutBlock_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexOutBlock_lit.tesc index b7b1cae4d6..f0ef5aa0cd 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexOutBlock_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexOutBlock_lit.tesc @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(vertices = 5) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexPatchOutBlock_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexPatchOutBlock_lit.tesc index ce3c2e54bb..bfc923e8ee 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexPatchOutBlock_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexPatchOutBlock_lit.tesc @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(vertices = 5) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc index c6b3e00ce9..7aa54c1b8f 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTesBasic_lit.tese b/llpc/test/shaderdb/object/ObjOutput_TestTesBasic_lit.tese index 3e15ea9682..c47577c3ce 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTesBasic_lit.tese +++ b/llpc/test/shaderdb/object/ObjOutput_TestTesBasic_lit.tese @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTesBuiltIn_lit.tese b/llpc/test/shaderdb/object/ObjOutput_TestTesBuiltIn_lit.tese index a516d203c5..a6e9ec93e6 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTesBuiltIn_lit.tese +++ b/llpc/test/shaderdb/object/ObjOutput_TestTesBuiltIn_lit.tese @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ #extension GL_ARB_shader_viewport_layer_array: enable diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTesCompSpecifier_lit.tese b/llpc/test/shaderdb/object/ObjOutput_TestTesCompSpecifier_lit.tese index 57e615e0f1..f1d936c089 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTesCompSpecifier_lit.tese +++ b/llpc/test/shaderdb/object/ObjOutput_TestTesCompSpecifier_lit.tese @@ -1,5 +1,28 @@ #version 450 core -/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsBasic_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsBasic_lit.vert index d85f3931f0..510907b4f8 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsBasic_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsBasic_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsBuiltIn_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsBuiltIn_lit.vert index 1bd691a153..82d18151c2 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsBuiltIn_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsBuiltIn_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsCompSpecifier_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsCompSpecifier_lit.vert index 9c19a1bfd1..59a7e147e9 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsCompSpecifier_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsCompSpecifier_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsDouble_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsDouble_lit.vert index 2984e7e6dd..1b0bea4a2c 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsDouble_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsDouble_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsMatrix.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsMatrix.vert index 028d11a958..b9a75def17 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsMatrix.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsMatrix.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsMatrixArray.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsMatrixArray.vert index d665c36dcf..47e0e9a6d0 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsMatrixArray.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsMatrixArray.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsNoBuiltIn_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsNoBuiltIn_lit.vert index 3e983de739..24ff884bb0 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsNoBuiltIn_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsNoBuiltIn_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsNoGeneric_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsNoGeneric_lit.vert index 77a2b7eb11..8e3b0ba62a 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsNoGeneric_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsNoGeneric_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsOutBlock_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsOutBlock_lit.vert index f46f62df67..cba0bb5f11 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsOutBlock_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsOutBlock_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsStruct_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsStruct_lit.vert index 2ebc71674e..55f096fef9 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsStruct_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsVectorArray_lit.vert index 0be3d5e3fd..1db7a57bb2 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsVectorArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjPushConst_TestBasic_lit.vert b/llpc/test/shaderdb/object/ObjPushConst_TestBasic_lit.vert index ec9067231d..0b2981c5ee 100644 --- a/llpc/test/shaderdb/object/ObjPushConst_TestBasic_lit.vert +++ b/llpc/test/shaderdb/object/ObjPushConst_TestBasic_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert b/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert index 30fc4ee97b..347ddac1cf 100644 --- a/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert b/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert index bcfb946d21..c06ed2030d 100644 --- a/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert +++ b/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag b/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag index b900f63a72..f27d33d913 100644 --- a/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag +++ b/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjPushConstant_TestMultiPushConstant_lit.spvasm b/llpc/test/shaderdb/object/ObjPushConstant_TestMultiPushConstant_lit.spvasm index 01b9b65feb..037423e24b 100644 --- a/llpc/test/shaderdb/object/ObjPushConstant_TestMultiPushConstant_lit.spvasm +++ b/llpc/test/shaderdb/object/ObjPushConstant_TestMultiPushConstant_lit.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag b/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag index 2f88741339..729f9dc660 100644 --- a/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag +++ b/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag b/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag index 0fde0ca6c8..de26787d88 100644 --- a/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag +++ b/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag b/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag index 4b41ad0f0c..1e3d26fa76 100644 --- a/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag +++ b/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestAlign_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestAlign_lit.frag index b4c191502d..5b6acbbc94 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestAlign_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestAlign_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestDirectIndex_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestDirectIndex_lit.frag index 3165a77d5e..a5790999e1 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestDirectIndex_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestDirectIndex_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestDouble_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestDouble_lit.frag index 9d9ff7078c..a99529adaf 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestDouble_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestDouble_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestIndirectIndex_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestIndirectIndex_lit.frag index 2d7429b348..c803c01997 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestIndirectIndex_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestIndirectIndex_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMatrixInStruct_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestMatrixInStruct_lit.vert index 2dfe798b45..63232dd3a0 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMatrixInStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMatrixInStruct_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemoryQualifier_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemoryQualifier_lit.frag index dd7631d689..b87bec688d 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemoryQualifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemoryQualifier_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert index c8eda55ef1..cc87ce1314 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestOffset_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestOffset_lit.frag index 72b9190cb9..469965ea3f 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestOffset_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestOffset_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestRowMajor_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestRowMajor_lit.frag index 1e6593dce5..a2b8ff166f 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestRowMajor_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestRowMajor_lit.frag @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestRuntimeArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestRuntimeArray_lit.vert index e9954c075e..e32425edcb 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestRuntimeArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestRuntimeArray_lit.vert @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicDouble_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicDouble_lit.vert index 854b695ee8..376b1f6dc3 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicDouble_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicDouble_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicFloat_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicFloat_lit.vert index 3f013c5f50..fe583891d8 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicFloat_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicFloat_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicInt_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicInt_lit.vert index d592bf402e..85a10f0652 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicInt_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicInt_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicUint_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicUint_lit.vert index 4b61e9ac96..9b8748bdb8 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicUint_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicUint_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrixArray_lit.vert index d5e16288c1..57fed12774 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrixArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrix_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrix_lit.vert index 47b6655f1d..a998b16c5a 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrix_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrix_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMixedMatrixStyle_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMixedMatrixStyle_lit.frag index ad58d4e4a7..a8ce1571d6 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMixedMatrixStyle_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMixedMatrixStyle_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreRowMajorMatrix_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreRowMajorMatrix_lit.frag index 8c890cfb6b..b2c61f777e 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreRowMajorMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreRowMajorMatrix_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreScalarArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreScalarArray_lit.vert index eaca7cdb1d..17e161a340 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreScalarArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreScalarArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreStruct_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreStruct_lit.vert index 17b0d06cdd..05f03e42ef 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreStruct_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrixArray_lit.vert index 542e4fe013..2abb8abcb3 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrixArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrix_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrix_lit.vert index 9d28d89511..4fb1a6a721 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrix_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrix_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToRowMajorMatrix_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToRowMajorMatrix_lit.frag index bbc85d7998..cb30479bab 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToRowMajorMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToRowMajorMatrix_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToScalarVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToScalarVectorArray_lit.vert index 25d065aa7e..f55c82962f 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToScalarVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToScalarVectorArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray.spvasm b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray.spvasm index db4d8820fb..f9c3abd760 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray.spvasm +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray_lit.vert index 8ff3e9e287..e614eeab46 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestUseStorageBuffer_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestUseStorageBuffer_lit.frag index f9dcccf2a3..f0df3bb2eb 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestUseStorageBuffer_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestUseStorageBuffer_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestAlign_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestAlign_lit.frag index 36b4671e78..b0f07e6563 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestAlign_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestAlign_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag index 7b31c452db..1ff3f95e76 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestIndirectIndex_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestIndirectIndex_lit.frag index fd900d5774..8ae95fc8ca 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestIndirectIndex_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestIndirectIndex_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicDouble_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicDouble_lit.vert index e3f593e46a..2839060392 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicDouble_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicDouble_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicFloat_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicFloat_lit.vert index e94063a772..a2806ddc56 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicFloat_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicFloat_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicInt_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicInt_lit.vert index f0f2c6996b..3a8fa106ac 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicInt_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicInt_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicUint_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicUint_lit.vert index a9295b832c..afea1a1810 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicUint_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicUint_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrixArray_lit.vert index 93f2702cf5..cda2395f68 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrixArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrix_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrix_lit.vert index 9152917971..e48de7e77c 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrix_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrix_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromRowMajorMatrix_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromRowMajorMatrix_lit.frag index 4ead173d58..4f908dbcb5 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromRowMajorMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromRowMajorMatrix_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromScalarVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromScalarVectorArray_lit.vert index 18a98163d3..86fd82cdcb 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromScalarVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromScalarVectorArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert index 63d51bfeca..5f05183a7b 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrix_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrix_lit.vert index 375fafb545..b5b493745a 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrix_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrix_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMixedMatrixStyle_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMixedMatrixStyle_lit.frag index 3a85962e53..cb50d221b7 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMixedMatrixStyle_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMixedMatrixStyle_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadNestedStruct_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadNestedStruct_lit.vert index 7879cf7bc5..18d7d3d36f 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadNestedStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadNestedStruct_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadRowMajorMatrix_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadRowMajorMatrix_lit.frag index 36c342fcde..c2a1126faf 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadRowMajorMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadRowMajorMatrix_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadScalarArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadScalarArray_lit.vert index a5a58d951e..c6f7e50104 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadScalarArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadScalarArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadStruct_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadStruct_lit.vert index 4b1d1388d0..9741b5b58b 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadStruct_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadVectorArray_lit.vert index 013a2c43ad..f454ffa5e2 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadVectorArray_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestOffset_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestOffset_lit.frag index 7d8afaf2ff..74efb63022 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestOffset_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestOffset_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformConstant_TestArray_lit.frag b/llpc/test/shaderdb/object/ObjUniformConstant_TestArray_lit.frag index 6dea238616..4e6c459061 100644 --- a/llpc/test/shaderdb/object/ObjUniformConstant_TestArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformConstant_TestArray_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjUniformConstant_TestSimple_lit.frag b/llpc/test/shaderdb/object/ObjUniformConstant_TestSimple_lit.frag index 279b698686..70156fea47 100644 --- a/llpc/test/shaderdb/object/ObjUniformConstant_TestSimple_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformConstant_TestSimple_lit.frag @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/object/ObjXfb_TestBasic_lit.vert b/llpc/test/shaderdb/object/ObjXfb_TestBasic_lit.vert index 9dff922fda..da580b557c 100644 --- a/llpc/test/shaderdb/object/ObjXfb_TestBasic_lit.vert +++ b/llpc/test/shaderdb/object/ObjXfb_TestBasic_lit.vert @@ -4,6 +4,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe index 0c979a0932..e47115fa30 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations.pipe index 07a0b7bfc2..c7ac911efa 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_IntersectionShaderVgprCount.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_IntersectionShaderVgprCount.pipe index 5709725642..c649eaf16a 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_IntersectionShaderVgprCount.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_IntersectionShaderVgprCount.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders.pipe index 6de60c0401..26100ee7c5 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders_Isa.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders_Isa.pipe index af09e0fa67..98b140a859 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders_Isa.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders_Isa.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_DifferentPayloads.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_DifferentPayloads.pipe index 10decef4eb..b17bd947f0 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_DifferentPayloads.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_DifferentPayloads.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe index 42c26dad85..ee85f22d77 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_NoPayload.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_NoPayload.pipe index 4068cf80af..46c7a849dc 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_NoPayload.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_NoPayload.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe index 1950c9c6b2..ca445d1400 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe index 7f2d7d042b..ea1c206d35 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe index b4f9664954..8d5360adbf 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestRtIgnoreDeclaredPayloadSize.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestRtIgnoreDeclaredPayloadSize.pipe index 43eb7eadee..e787e766d3 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestRtIgnoreDeclaredPayloadSize.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestRtIgnoreDeclaredPayloadSize.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe index 702bc4fbd0..7966c3b3c4 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestWaveSize.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestWaveSize.pipe index 451d398fc2..4cd7b43f1a 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestWaveSize.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestWaveSize.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/TestContState.rchit b/llpc/test/shaderdb/ray_tracing/TestContState.rchit index dfd2525334..55724bc42e 100644 --- a/llpc/test/shaderdb/ray_tracing/TestContState.rchit +++ b/llpc/test/shaderdb/ray_tracing/TestContState.rchit @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/ray_tracing/TestContState.rgen b/llpc/test/shaderdb/ray_tracing/TestContState.rgen index 473c77e04b..248b8dbb90 100644 --- a/llpc/test/shaderdb/ray_tracing/TestContState.rgen +++ b/llpc/test/shaderdb/ray_tracing/TestContState.rgen @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ diff --git a/llpc/test/shaderdb/ray_tracing/TestHitAttribute.rint b/llpc/test/shaderdb/ray_tracing/TestHitAttribute.rint index 8bc18fd0e3..940f6e158f 100644 --- a/llpc/test/shaderdb/ray_tracing/TestHitAttribute.rint +++ b/llpc/test/shaderdb/ray_tracing/TestHitAttribute.rint @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.pipe b/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.pipe index 6905984856..692f2c215a 100644 --- a/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.pipe +++ b/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/ray_tracing/TestPayloadSizes.rgen b/llpc/test/shaderdb/ray_tracing/TestPayloadSizes.rgen index 6613c0832b..592c3e528d 100644 --- a/llpc/test/shaderdb/ray_tracing/TestPayloadSizes.rgen +++ b/llpc/test/shaderdb/ray_tracing/TestPayloadSizes.rgen @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // NOTE : Do not autogenerate diff --git a/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen b/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen index a007c84b90..932802f559 100644 --- a/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen +++ b/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // RUN: amdllpc %gfxip --print-after=lower-gpurt-library 2>&1 %s | FileCheck -check-prefix=CHECK %s diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rahit b/llpc/test/shaderdb/ray_tracing/standalone.rahit index 4dad340daa..544ab29438 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rahit +++ b/llpc/test/shaderdb/ray_tracing/standalone.rahit @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rcall b/llpc/test/shaderdb/ray_tracing/standalone.rcall index 135a14bd47..dc6482d4a0 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rcall +++ b/llpc/test/shaderdb/ray_tracing/standalone.rcall @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rchit b/llpc/test/shaderdb/ray_tracing/standalone.rchit index f43ba71fb2..fccf819d80 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rchit +++ b/llpc/test/shaderdb/ray_tracing/standalone.rchit @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rgen b/llpc/test/shaderdb/ray_tracing/standalone.rgen index 7c740b34da..1dee612390 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rgen +++ b/llpc/test/shaderdb/ray_tracing/standalone.rgen @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rint b/llpc/test/shaderdb/ray_tracing/standalone.rint index b8f6a5e4e6..f21368bd27 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rint +++ b/llpc/test/shaderdb/ray_tracing/standalone.rint @@ -3,6 +3,24 @@ * * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * **********************************************************************************************************************/ // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/relocatable_shaders/DescPtrSingleSelect.spvasm b/llpc/test/shaderdb/relocatable_shaders/DescPtrSingleSelect.spvasm index f42bce8124..55ed3fbe78 100644 --- a/llpc/test/shaderdb/relocatable_shaders/DescPtrSingleSelect.spvasm +++ b/llpc/test/shaderdb/relocatable_shaders/DescPtrSingleSelect.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineCs_PipelineCacheHit.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineCs_PipelineCacheHit.pipe index ac7f9b1e83..81e8be8b13 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineCs_PipelineCacheHit.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineCs_PipelineCacheHit.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineCs_TestUnsupportedShader.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineCs_TestUnsupportedShader.pipe index 94273b8090..2385196c03 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineCs_TestUnsupportedShader.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineCs_TestUnsupportedShader.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineGs_BasicRelocGsTest.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineGs_BasicRelocGsTest.pipe index 81d68b3dc9..29d83464b2 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineGs_BasicRelocGsTest.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineGs_BasicRelocGsTest.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineTess_RelocRemoveUnusedTcsOutputs.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineTess_RelocRemoveUnusedTcsOutputs.pipe index c81dbc7ca9..f2069dedb8 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineTess_RelocRemoveUnusedTcsOutputs.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineTess_RelocRemoveUnusedTcsOutputs.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModeFlushToZero.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModeFlushToZero.pipe index bd1be80685..c62e83353c 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModeFlushToZero.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModeFlushToZero.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModePreserve.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModePreserve.pipe index cf1ffd5d3c..83ab78f156 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModePreserve.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModePreserve.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe index d2d7eabefd..b445224667 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_FillPsInput.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_FillPsInput.pipe index 31543b9db1..f960e43aa4 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_FillPsInput.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_FillPsInput.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ImmutableSampler.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ImmutableSampler.pipe index d6857a1c76..fab199b641 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ImmutableSampler.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ImmutableSampler.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultiDwordPushConst.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultiDwordPushConst.pipe index 1024224031..8fe38b820c 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultiDwordPushConst.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultiDwordPushConst.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe index 3a0f78100b..27a9150570 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_PipelineCacheHit.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_PipelineCacheHit.pipe index ba1de3beb1..c04ea7b12e 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_PipelineCacheHit.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_PipelineCacheHit.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocCheckPsInControl.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocCheckPsInControl.pipe index 8f0d6c14ac..88a60eafe5 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocCheckPsInControl.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocCheckPsInControl.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocMultiView.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocMultiView.pipe index fe347f1aeb..3bbb9fa6a4 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocMultiView.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocMultiView.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTable.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTable.pipe index 750574a6a5..1ee3b7a354 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTable.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTable.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTableMissingFmask.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTableMissingFmask.pipe index a8c486b783..ca98aceb2d 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTableMissingFmask.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTableMissingFmask.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableInOutMapping.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableInOutMapping.pipe index 66b301c593..3900c5f548 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableInOutMapping.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableInOutMapping.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableSeparateCompilation.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableSeparateCompilation.pipe index c476e6e703..d20f9ede9a 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableSeparateCompilation.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableSeparateCompilation.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_BuiltinExportInPrologue.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_BuiltinExportInPrologue.pipe index 15a064f169..3e30812fde 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_BuiltinExportInPrologue.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_BuiltinExportInPrologue.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_PsInput.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_PsInput.pipe index 61951a3f52..3f6dbe0d19 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_PsInput.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_PsInput.pipe @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/TriangleVs_CheckNoteSectionForCacheHash.spvasm b/llpc/test/shaderdb/relocatable_shaders/TriangleVs_CheckNoteSectionForCacheHash.spvasm index 46a19af8e4..eb3e8e91de 100644 --- a/llpc/test/shaderdb/relocatable_shaders/TriangleVs_CheckNoteSectionForCacheHash.spvasm +++ b/llpc/test/shaderdb/relocatable_shaders/TriangleVs_CheckNoteSectionForCacheHash.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/shaderdb/relocatable_shaders/VsGs_Reloc.spvasm b/llpc/test/shaderdb/relocatable_shaders/VsGs_Reloc.spvasm index f367342bf5..4c55f328fb 100644 --- a/llpc/test/shaderdb/relocatable_shaders/VsGs_Reloc.spvasm +++ b/llpc/test/shaderdb/relocatable_shaders/VsGs_Reloc.spvasm @@ -3,6 +3,24 @@ ; ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/base_test.pipe b/llpc/test/tools/UpdateTestChecks/Inputs/base_test.pipe index 56a702c730..a7f5a7d7a6 100644 --- a/llpc/test/tools/UpdateTestChecks/Inputs/base_test.pipe +++ b/llpc/test/tools/UpdateTestChecks/Inputs/base_test.pipe @@ -5,7 +5,30 @@ ; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s -;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [Version] diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe b/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe index eb9dfb6bbc..e101099833 100644 --- a/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe +++ b/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe @@ -5,7 +5,30 @@ ; RUN: amdllpc -enable-relocatable-shader-elf -filetype=asm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s -;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [Version] diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe b/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe index 3200056da7..3ad14247a9 100644 --- a/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe +++ b/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe @@ -5,7 +5,30 @@ ; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s -;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; [Version] diff --git a/llpc/translator/lib/SPIRV/SPIRVInternal.h b/llpc/translator/lib/SPIRV/SPIRVInternal.h index 2452d3b56c..12c5799f63 100644 --- a/llpc/translator/lib/SPIRV/SPIRVInternal.h +++ b/llpc/translator/lib/SPIRV/SPIRVInternal.h @@ -5,7 +5,7 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -220,6 +220,9 @@ const static char InOut[] = "spirv.InOut"; const static char Block[] = "spirv.Block"; const static char PushConst[] = "spirv.PushConst"; const static char Resource[] = "spirv.Resource"; +#if LLPC_BUILD_GFX12 +const static char ResourceNoAlloc[] = "spirv.Resource.NoAlloc"; +#endif const static char TaskPayload[] = "spirv.TaskPayload"; const static char UniformConstant[] = "spirv.UniformConstant"; const static char ExecutionModel[] = "spirv.ExecutionModel"; diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.cpp b/llpc/translator/lib/SPIRV/SPIRVReader.cpp index f4b653f84a..039112dec3 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.cpp +++ b/llpc/translator/lib/SPIRV/SPIRVReader.cpp @@ -292,6 +292,23 @@ static Value *vectorCompositeConstruct(Type *vecTy, const std::vector & return v; } +#if LLPC_BUILD_GFX12 +static bool isNoAllocResource(unsigned descSet, unsigned binding, const PipelineShaderOptions *shaderOption) { + Vkgc::CachePolicyLlc::NoAllocResource noAllocResource = {}; + noAllocResource.set = descSet; + noAllocResource.binding = binding; + + Vkgc::CachePolicyLlc::NoAllocResource resourceTuning = {}; + for (unsigned i = 0; i < shaderOption->cachePolicyLlc.resourceCount; i++) { + resourceTuning.u32All = shaderOption->cachePolicyLlc.noAllocs[i]; + if (resourceTuning.resourceId == noAllocResource.resourceId) + if (resourceTuning.noAlloc) + return true; + } + return false; +} +#endif + bool SPIRVToLLVM::isStorageClassExplicitlyLaidOut(SPIRVStorageClassKind storageClass) { return llvm::is_contained({StorageClassStorageBuffer, StorageClassUniform, StorageClassPushConstant, StorageClassPhysicalStorageBufferEXT}, @@ -1012,6 +1029,9 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *co return lgc::xdl::getCooperativeMatrixTy(*getBuilder(), elemType, matrixLayout, kSize); } +#if LLPC_BUILD_GFX12 +#endif + // ===================================================================================================================== // Get pointee type from SPIRV Value. // @@ -1180,6 +1200,8 @@ Type *SPIRVToLLVM::transTypeImpl(SPIRVType *t, unsigned matrixStride, bool colum case OpTypeCooperativeMatrixKHR: { return transTypeWithOpcode(t, matrixStride, columnMajor, layout); } +#if LLPC_BUILD_GFX12 +#endif default: { llvm_unreachable("Not implemented"); } @@ -5402,10 +5424,25 @@ SPIRVToLLVM::getCooperativeMatrixKHRLayout(CooperativeMatrixUse use, lgc::xdl::C [[maybe_unused]] const Vkgc::GfxIpVersion gfxIp = getPipelineContext()->getGfxIpVersion(); if (use == CooperativeMatrixUse::CooperativeMatrixUseMatrixAKHR || use == CooperativeMatrixUse::CooperativeMatrixUseMatrixBKHR) { +#if LLPC_BUILD_GFX12 + if (gfxIp.major == 12) { + if (lgc::xdl::isTypeNCooperativeMatrix(elemType, 16) || lgc::xdl::isTypeNCooperativeMatrix(elemType, 8)) { + if (rows == 16 && columns == 16) + return lgc::xdl::CooperativeMatrixLayout::Gfx12BaseLayout; + return lgc::xdl::CooperativeMatrixLayout::Gfx12SwizzledKX16Layout; + } + llvm_unreachable("Invalid element type!"); + return lgc::xdl::CooperativeMatrixLayout::InvalidLayout; + } +#endif return lgc::xdl::CooperativeMatrixLayout::FactorMatrixLayout; } if (use == CooperativeMatrixUse::CooperativeMatrixUseMatrixAccumulatorKHR) { +#if LLPC_BUILD_GFX12 + if (gfxIp.major == 12) + return lgc::xdl::CooperativeMatrixLayout::Gfx12BaseLayout; +#endif if (gfxIp.major == 11) return lgc::xdl::CooperativeMatrixLayout::AccumulatorMatrixLayout; if (lgc::xdl::isTypeNCooperativeMatrix(elemType, 32)) @@ -5648,6 +5685,12 @@ template <> Value *SPIRVToLLVM::transValueWithOpcodegetGfxIpVersion(); unsigned kMultiplier = 1; +#if LLPC_BUILD_GFX12 + if (gfxIp.major == 12) { + if (lgc::xdl::isTypeNCooperativeMatrix(elemBasicTypeA, 4) && isa(coopMatrixB->getType())) + kMultiplier = 2; + } +#endif Type *coopMatrixDType = coopMatrixC->getType(); lgc::xdl::CooperativeMatrixElementType elemBasicTypeD = elemBasicTypeC; @@ -5677,6 +5720,9 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SP return getBuilder()->create(rayQuery, accStru, rayFlags, mask, origin, tmin, dir, tmax); } +#if LLPC_BUILD_GFX12 +#endif + /// For instructions, this function assumes they are created in order /// and appended to the given basic block. An instruction may use a /// instruction from another BB which has not been translated. Such @@ -7391,6 +7437,16 @@ static void scanImageDescNonUniformCV(SPIRVToLLVM::ExtractedImageInfo *info, SPI info->flags |= lgc::Builder::ImageFlagCoherent; if (spvValue->hasDecorate(DecorationVolatile)) info->flags |= lgc::Builder::ImageFlagVolatile; +#if LLPC_BUILD_GFX12 + if (spvValue->hasDecorate(DecorationBinding)) { + SPIRVWord binding = SPIRVID_INVALID; + unsigned descSet = 0; + spvValue->hasDecorate(DecorationBinding, 0, &binding); + spvValue->hasDecorate(DecorationDescriptorSet, 0, &descSet); + if (isNoAllocResource(descSet, binding, shaderOption)) + info->flags |= lgc::Builder::ImageFlagLlcNoAlloc; + } +#endif } const auto opcode = spvValue->getOpCode(); @@ -9409,6 +9465,12 @@ bool SPIRVToLLVM::transDecoration(SPIRVValue *bv, ArrayRef values) { if (!gv->hasMetadata(gSPIRVMD::Resource)) gv->addMetadata(gSPIRVMD::Resource, *resMdNode); +#if LLPC_BUILD_GFX12 + if (!gv->hasMetadata(gSPIRVMD::ResourceNoAlloc)) + if (isNoAllocResource(descSet, binding, m_shaderOptions)) + gv->addMetadata(gSPIRVMD::ResourceNoAlloc, *MDNode::get(*m_context, {})); +#endif + // Build block metadata const bool isUniformBlock = bv->getType()->getPointerStorageClass() != StorageClassStorageBuffer && blockTy->hasDecorate(DecorationBlock); diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h index e3cc0b9f3b..ba5a158c24 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h @@ -5,7 +5,7 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -211,6 +211,8 @@ template <> inline void SPIRVMap::init() { ADD_VEC_INIT(CapabilityMeshShadingEXT, {CapabilityShader}); ADD_VEC_INIT(CapabilityFragmentBarycentricKHR, {CapabilityShader}); ADD_VEC_INIT(CapabilityCooperativeMatrixKHR, {CapabilityShader}); +#if LLPC_BUILD_GFX12 +#endif ADD_VEC_INIT(CapabilityComputeDerivativeGroupLinearNV, {CapabilityShader}); ADD_VEC_INIT(CapabilityComputeDerivativeGroupQuadsNV, {CapabilityShader}); ADD_VEC_INIT(CapabilityComputeDerivativeGroupLinearKHR, {CapabilityShader}); diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h index e6659102c7..ba9fd9073b 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -2802,6 +2802,9 @@ _SPIRV_OP(AssumeTrueKHR, false, 2, false) _SPIRV_OP(ExpectKHR, true, 5, false) #undef _SPIRV_OP +#if LLPC_BUILD_GFX12 +#endif + } // namespace SPIRV #endif diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index ba195f5c27..76c0347787 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -5,7 +5,7 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -510,6 +510,8 @@ template <> inline void SPIRVMap::init() { add(CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR, "WorkgroupMemoryExplicitLayout8BitAccessKHR"); add(CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR, "WorkgroupMemoryExplicitLayout16BitAccessKHR"); add(CapabilityCooperativeMatrixKHR, "CooperativeMatrixKHR"); +#if LLPC_BUILD_GFX12 +#endif add(CapabilityComputeDerivativeGroupLinearNV, "ComputeDerivativeGroupLinearNV"); add(CapabilityComputeDerivativeGroupQuadsNV, "ComputeDerivativeGroupQuadsNV"); add(CapabilityComputeDerivativeGroupLinearKHR, "ComputeDerivativeGroupLinearKHR"); diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h index fcbfbc6547..ee12e1a5b9 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h @@ -339,6 +339,8 @@ _SPIRV_OP(CooperativeMatrixLoadKHR, 4457) _SPIRV_OP(CooperativeMatrixStoreKHR, 4458) _SPIRV_OP(CooperativeMatrixMulAddKHR, 4459) _SPIRV_OP(CooperativeMatrixLengthKHR, 4460) +#if LLPC_BUILD_GFX12 +#endif _SPIRV_OP(BeginInvocationInterlockEXT, 5364) _SPIRV_OP(EndInvocationInterlockEXT, 5365) _SPIRV_OP(DemoteToHelperInvocationEXT, 5380) diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.cpp b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.cpp index d2370d990d..203ad65af4 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.cpp +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.cpp @@ -5,7 +5,7 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -222,6 +222,9 @@ uint32_t SPIRVType::getCooperativeMatrixKHRUse() const { return static_cast(this)->getUse()->getZExtIntValue(); } +#if LLPC_BUILD_GFX12 +#endif + bool SPIRVType::isTypeVoid() const { return OpCode == OpTypeVoid; } @@ -298,6 +301,9 @@ bool SPIRVType::isTypeCooperativeMatrixKHR() const { return OpCode == OpTypeCooperativeMatrixKHR; } +#if LLPC_BUILD_GFX12 +#endif + void SPIRVTypeFloat::decode(std::istream &I) { getDecoder(I) >> (Id) >> (BitWidth); if (WordCount > FixedWC) diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.h index cf89b5f8ee..7b4f3bdd1a 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVType.h @@ -5,7 +5,7 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -109,6 +109,8 @@ class SPIRVType : public SPIRVEntry { bool isTypeAccelerationStructureKHR() const; bool isTypeRayQueryKHR() const; bool isTypeCooperativeMatrixKHR() const; +#if LLPC_BUILD_GFX12 +#endif }; class SPIRVTypeVoid : public SPIRVType { @@ -700,6 +702,9 @@ class SPIRVTypeCooperativeMatrixKHR : public SPIRVType { SPIRVId CompIntp; // Specifies how Component Type is interpreted }; +#if LLPC_BUILD_GFX12 +#endif + template bool isType(const T1 *Ty, unsigned Bits = 0) { bool Is = Ty->getOpCode() == T2::OC; if (!Is) diff --git a/llpc/unittests/util/testPipelineDumper.cpp b/llpc/unittests/util/testPipelineDumper.cpp index 997c1df918..1237fb39c4 100644 --- a/llpc/unittests/util/testPipelineDumper.cpp +++ b/llpc/unittests/util/testPipelineDumper.cpp @@ -397,5 +397,27 @@ TEST(PipelineDumperTest, TestForceNonUniformResourceIndexStageMaskCompute) { runComputePipelineVariations(modifyBuildInfo, expectHashToBeEqual); } +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Test the expertSchedulingMode option. + +TEST(PipelineDumperTest, TestExpertSchedulingModeGraphics) { + ModifyGraphicsBuildInfo modifyBuildInfo = [](GraphicsPipelineBuildInfo *buildInfo) { + buildInfo->options.expertSchedulingMode = true; + }; + + HashModifiedFunc expectHashToBeEqual = [](const GenerateHashParams ¶ms) { return false; }; + runGraphicsPipelineVariations(modifyBuildInfo, expectHashToBeEqual); +} + +TEST(PipelineDumperTest, TestExpertSchedulingModeCompute) { + ModifyComputeBuildInfo modifyBuildInfo = [](ComputePipelineBuildInfo *buildInfo) { + buildInfo->options.expertSchedulingMode = true; + }; + HashModifiedFunc expectHashToBeEqual = [](const GenerateHashParams ¶ms) { return false; }; + runComputePipelineVariations(modifyBuildInfo, expectHashToBeEqual); +} +#endif + } // namespace } // namespace Llpc diff --git a/llvmraytracing/include/lgc/LgcCpsDialect.h b/llvmraytracing/include/lgc/LgcCpsDialect.h index 79f6eeed02..db74909898 100644 --- a/llvmraytracing/include/lgc/LgcCpsDialect.h +++ b/llvmraytracing/include/lgc/LgcCpsDialect.h @@ -74,4 +74,8 @@ CpsSchedulingLevel getCpsLevelForShaderStage(lgc::rt::RayTracingShaderStage stag uint8_t getPotentialCpsReturnLevels(lgc::rt::RayTracingShaderStage stage); llvm::Value *lowerAsContinuationReference(llvm::IRBuilder<> &Builder, lgc::cps::AsContinuationReferenceOp &AsCROp, llvm::Value *Relocation = nullptr); +#if LLPC_BUILD_GFX12 +void setMaxOutgoingVgprCount(llvm::Function &fn, unsigned maxOutgoingVgprCount); +std::optional tryGetMaxOutgoingVgprCount(const llvm::Function &fn); +#endif } // namespace lgc::cps diff --git a/llvmraytracing/lib/DXILContPostProcess.cpp b/llvmraytracing/lib/DXILContPostProcess.cpp index 1c085c8d90..b5b026e2b5 100644 --- a/llvmraytracing/lib/DXILContPostProcess.cpp +++ b/llvmraytracing/lib/DXILContPostProcess.cpp @@ -90,6 +90,22 @@ Value *DXILContPostProcessPassImpl::ensure64BitAddr(Value *Src) { Value *Addr64 = Builder.CreateZExt(Src, I64); Addr64 = Builder.CreateAnd(Addr64, 0xFFFFFFC0); +#if LLPC_BUILD_GFX12 + // Extract the dVGPR requirements and priority, encode it in the target VPC + // vgprCount = (((vpc32 >> 3) & 0x7) + 1) * 16 + // vpc64 |= vgprCount << 32 + // Rewritten as: + // vgprCount = ((vpc32 & 0x38) << 1) + 16 + // vpc64 |= vgprCount << 32 + constexpr static uint32_t VgprBlockSize = 16; + Value *VgprBlockCountTmp1 = Builder.CreateAnd(Src, 0x38); + Value *VgprBlockCountTmp2 = Builder.CreateShl(VgprBlockCountTmp1, 1); + Value *VgprCount = Builder.CreateAdd(VgprBlockCountTmp2, Builder.getInt32(VgprBlockSize)); + VgprCount = Builder.CreateZExt(VgprCount, I64); + VgprCount = Builder.CreateShl(VgprCount, 32); + Addr64 = Builder.CreateOr(Addr64, VgprCount); +#endif + Value *Priority = Builder.CreateAnd(Src, Builder.getInt32(0x7)); // firstMetadataBit = 32 // firstPriorityBitInMetadata = 16 diff --git a/llvmraytracing/lib/LgcCpsDialect.cpp b/llvmraytracing/lib/LgcCpsDialect.cpp index 279c8b1c9c..1b97ba6a59 100644 --- a/llvmraytracing/lib/LgcCpsDialect.cpp +++ b/llvmraytracing/lib/LgcCpsDialect.cpp @@ -45,6 +45,10 @@ using namespace lgc::rt; constexpr const char CpsMetadata[] = "lgc.cps"; constexpr const char CpsMaxArgumentVgprsMetadata[] = "lgc.cps.maxArgumentVgprs"; +#if LLPC_BUILD_GFX12 +constexpr const char CpsMaxOutgoingVgprCountMetadata[] = "lgc.cps.maxOutgoingVgprCount"; +#endif + // ===================================================================================================================== // Helper to determine how many dwords we require to store a variable of a given // type. Note that this does not include any padding except for pointers. @@ -249,3 +253,30 @@ Value *lgc::cps::lowerAsContinuationReference(IRBuilder<> &Builder, lgc::cps::As return Reference; } + +#if LLPC_BUILD_GFX12 +// ==================================================================================================================== +// Sets max outgoing VGPR count metadata. +void lgc::cps::setMaxOutgoingVgprCount(Function &fn, unsigned maxOutgoingVgpr) { + LLVMContext &context = fn.getContext(); + MDNode *node = + MDNode::get(context, {ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(context), maxOutgoingVgpr))}); + fn.setMetadata(CpsMaxOutgoingVgprCountMetadata, node); +} + +// ===================================================================================================================== +// Returns the max outgoing VGPR count of a function. Returns std::nullopt if +// not set. +// If this metadata is set, it means that this function will write the number +// into an SGPR; if not, it means that this function will read the number from +// an input SGPR. +std::optional lgc::cps::tryGetMaxOutgoingVgprCount(const Function &fn) { + MDNode *node = fn.getMetadata(fn.getContext().getMDKindID(CpsMaxOutgoingVgprCountMetadata)); + if (!node) { + return std::nullopt; + } + + const ConstantAsMetadata *c = cast(node->getOperand(0)); + return cast(c->getValue())->getZExtValue(); +} +#endif diff --git a/llvmraytracing/plugin/CMakeLists.txt b/llvmraytracing/plugin/CMakeLists.txt index ea7c12f660..f3409cbb24 100644 --- a/llvmraytracing/plugin/CMakeLists.txt +++ b/llvmraytracing/plugin/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to diff --git a/sharedme/xdl/CMakeLists.txt b/sharedme/xdl/CMakeLists.txt index 95821b8e11..8d3eb6e257 100644 --- a/sharedme/xdl/CMakeLists.txt +++ b/sharedme/xdl/CMakeLists.txt @@ -58,6 +58,13 @@ set_compiler_options(sharedme_xdl) set(SHAREDME_XDL_TABLEGEN_DEFINES) +#if LLPC_BUILD_GFX12 +if(LLPC_BUILD_GFX12) + target_compile_definitions(sharedme_xdl PUBLIC LLPC_BUILD_GFX12=1) + list(APPEND SHAREDME_XDL_TABLEGEN_DEFINES -DLLPC_BUILD_GFX12) +endif() +#endif + # TableGen for dialects include("${LLPC_SOURCE_DIR}/cmake/DialectsTablegen.cmake") set_dialects_tablegen_exe(SHAREDME_XDL) diff --git a/sharedme/xdl/include/lgc/LgcXdlDialect.td b/sharedme/xdl/include/lgc/LgcXdlDialect.td index 4b05dfee89..9101630db0 100644 --- a/sharedme/xdl/include/lgc/LgcXdlDialect.td +++ b/sharedme/xdl/include/lgc/LgcXdlDialect.td @@ -60,3 +60,6 @@ class LgcXdlIntrinOp traits_ = []> include "lgc/CooperativeMatrix.td" include "lgc/RowAccumulator.td" +#ifdef LLPC_BUILD_GFX12 +include "lgc/Sparse.td" +#endif diff --git a/sharedme/xdl/include/lgc/LgcXdlTypes.h b/sharedme/xdl/include/lgc/LgcXdlTypes.h index d1b82f4af9..c358dbfc66 100644 --- a/sharedme/xdl/include/lgc/LgcXdlTypes.h +++ b/sharedme/xdl/include/lgc/LgcXdlTypes.h @@ -60,6 +60,10 @@ enum class CooperativeMatrixLayout : unsigned { AccumulatorMatrixLayout, // C/D layout on gfx11 Gfx10AccumulatorMatrixLayout, // 32bit@C/D layout on gfx10 Gfx10Accumulator16bitMatrixLayout, // 16bit@C/D layout on gfx10 +#if LLPC_BUILD_GFX12 + Gfx12BaseLayout, // Base layout on gfx12 + Gfx12SwizzledKX16Layout, // KX16 layout for denseB on gfx12. +#endif InvalidLayout }; @@ -82,4 +86,11 @@ enum class CooperativeMatrixArithOp : unsigned { FMod }; +#if LLPC_BUILD_GFX12 +enum class SparseCooperativeMatrixSparsityFormat : unsigned { + Sparsity2to4AMD = 0, // Format of the sparse index + InvalidSparsityFormat +}; +#endif + } // namespace lgc::xdl diff --git a/sharedme/xdl/include/xdl/util/ElementType.h b/sharedme/xdl/include/xdl/util/ElementType.h index 61e393d099..c07a17f396 100644 --- a/sharedme/xdl/include/xdl/util/ElementType.h +++ b/sharedme/xdl/include/xdl/util/ElementType.h @@ -46,6 +46,11 @@ namespace lgc::xdl { llvm::Type *getCooperativeMatrixTy(llvm_dialects::Builder &builder, CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, unsigned kSize = 16); +#if LLPC_BUILD_GFX12 +// Get the llvm type of a sparse index for the sparseCooperativeMatrix. +llvm::Type *getSparseIndexTy(llvm_dialects::Builder &builder, SparseCooperativeMatrixSparsityFormat format); +#endif + // Whether the type of a cooperative matrix is integer. bool isUnderlyingIntegerCooperativeMatrix(CooperativeMatrixElementType elemType); diff --git a/sharedme/xdl/util/ElementType.cpp b/sharedme/xdl/util/ElementType.cpp index 73a6baa27e..83925f890b 100644 --- a/sharedme/xdl/util/ElementType.cpp +++ b/sharedme/xdl/util/ElementType.cpp @@ -89,11 +89,44 @@ llvm::Type *lgc::xdl::getCooperativeMatrixTy(llvm_dialects::Builder &builder, Co if (elemType == CooperativeMatrixElementType::Int8) return llvm::FixedVectorType::get(wordTy, 4); return llvm::FixedVectorType::get(wordTy, 8); +#if LLPC_BUILD_GFX12 + case CooperativeMatrixLayout::Gfx12BaseLayout: + assert(kSize == 16); + // Total elementNumber * element_bit_width/ (waveSize * vgpr_size_perlane); + // Use wave32 as default, wave64 will have some poison values in later process. + cntDwords = (16 * 16 * getBitWidthOfCooperativeMatrixElement(elemType)) / (32 * 32); + if (cntDwords > 1) + return llvm::FixedVectorType::get(wordTy, cntDwords); + return builder.getInt32Ty(); + case CooperativeMatrixLayout::Gfx12SwizzledKX16Layout: + assert(kSize >= 32); + cntDwords = (kSize * 16 * getBitWidthOfCooperativeMatrixElement(elemType)) / (32 * 32); + if (cntDwords > 1) + return llvm::FixedVectorType::get(wordTy, cntDwords); + return builder.getInt32Ty(); +#endif default: llvm_unreachable("Type is not supported!"); } } +#if LLPC_BUILD_GFX12 +// ===================================================================================================================== +// Get the LLVM type of a sparse index for the sparseCooperativeMatrix. +// +// @param format : The sparse index for the sparseCooperativeMatrix +llvm::Type *lgc::xdl::getSparseIndexTy(llvm_dialects::Builder &builder, SparseCooperativeMatrixSparsityFormat format) { + // Note: the layout currently has no influence on the type. In the long run, we should switch to genuinely opaque + // types at the LGC level, and parameterize the type using both the element type and the layout. + switch (format) { + case SparseCooperativeMatrixSparsityFormat::Sparsity2to4AMD: + return builder.getInt32Ty(); + default: + llvm_unreachable("The sparsity index type is not supported now."); + } +} +#endif + // ===================================================================================================================== // Whether the underlying type of a cooperative matrix is integer. // diff --git a/test/amber/subgroupshuffle-index-constant.amber b/test/amber/subgroupshuffle-index-constant.amber index bf48036731..0390530bb1 100644 --- a/test/amber/subgroupshuffle-index-constant.amber +++ b/test/amber/subgroupshuffle-index-constant.amber @@ -1,5 +1,28 @@ #!amber -v 1.3 -## Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ## +## + ####################################################################################################################### + # + # Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### # RUN: run_amber_test.py --icd %icd %s diff --git a/test/amber/subgroupshuffle-index-uniform.amber b/test/amber/subgroupshuffle-index-uniform.amber index bd77c736c9..be1913e194 100644 --- a/test/amber/subgroupshuffle-index-uniform.amber +++ b/test/amber/subgroupshuffle-index-uniform.amber @@ -1,5 +1,28 @@ #!amber -v 1.3 -## Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ## +## + ####################################################################################################################### + # + # Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### # RUN: run_amber_test.py --icd %icd %s diff --git a/tool/dumper/vkgcPipelineDumper.cpp b/tool/dumper/vkgcPipelineDumper.cpp index e0586b8b9a..8b4c89d65c 100644 --- a/tool/dumper/vkgcPipelineDumper.cpp +++ b/tool/dumper/vkgcPipelineDumper.cpp @@ -711,10 +711,21 @@ void PipelineDumper::dumpPipelineShaderInfo(const PipelineShaderInfo *shaderInfo dumpFile << "options.disableReadFirstLaneWorkaround = " << shaderInfo->options.disableReadFirstLaneWorkaround << "\n"; dumpFile << "options.backwardPropagateNoContract = " << shaderInfo->options.backwardPropagateNoContract << "\n"; dumpFile << "options.forwardPropagateNoContract = " << shaderInfo->options.forwardPropagateNoContract << "\n"; +#if LLPC_BUILD_GFX12 + dumpFile << "options.workgroupRoundRobin = " << shaderInfo->options.workgroupRoundRobin << "\n"; +#endif dumpFile << "options.constantBufferBindingOffset = " << shaderInfo->options.constantBufferBindingOffset << "\n"; dumpFile << "options.imageSampleDrefReturnsRgba = " << shaderInfo->options.imageSampleDrefReturnsRgba << "\n"; dumpFile << "options.disableGlPositionOpt = " << shaderInfo->options.disableGlPositionOpt << "\n"; dumpFile << "options.viewIndexFromDeviceIndex = " << shaderInfo->options.viewIndexFromDeviceIndex << "\n"; +#if LLPC_BUILD_GFX12 + for (unsigned idx = 0; idx < shaderInfo->options.cachePolicyLlc.resourceCount; idx++) { + dumpFile << "options.cachePolicyLlc = " << "0x" << std::hex << shaderInfo->options.cachePolicyLlc.noAllocs[idx] + << "," << std::dec << "\n"; + } + dumpFile << "options.resourceCount = " << shaderInfo->options.cachePolicyLlc.resourceCount << "\n"; + dumpFile << "options.temporalHintShaderControl = " << shaderInfo->options.forceUnderflowPrevention << "\n"; +#endif dumpFile << "options.forceUnderflowPrevention = " << shaderInfo->options.forceUnderflowPrevention << "\n"; dumpFile << "options.forceMemoryBarrierScope = " << shaderInfo->options.forceMemoryBarrierScope << "\n"; dumpFile << "options.scheduleStrategy = " << shaderInfo->options.scheduleStrategy << "\n"; @@ -992,6 +1003,9 @@ void PipelineDumper::dumpPipelineOptions(const PipelineOptions *options, std::os dumpFile << "options.internalRtShaders = " << options->internalRtShaders << "\n"; dumpFile << "options.forceNonUniformResourceIndexStageMask = " << options->forceNonUniformResourceIndexStageMask << "\n"; +#if LLPC_BUILD_GFX12 + dumpFile << "options.expertSchedulingMode = " << options->expertSchedulingMode << "\n"; +#endif const char *glStatePrefix = "options.glState."; dumpFile << glStatePrefix << "replaceSetWithResourceType = " << options->getGlState().replaceSetWithResourceType @@ -1012,6 +1026,11 @@ void PipelineDumper::dumpPipelineOptions(const PipelineOptions *options, std::os dumpFile << glStatePrefix << "enablePointSmooth = " << options->getGlState().enablePointSmooth << "\n"; dumpFile << glStatePrefix << "enableRemapLocation = " << options->getGlState().enableRemapLocation << "\n"; dumpFile << glStatePrefix << "enableDepthCompareParam = " << options->getGlState().enableDepthCompareParam << "\n"; +#if LLPC_BUILD_GFX12 + dumpFile << "options.cacheScopePolicyControl = " << options->cacheScopePolicyControl << "\n"; + dumpFile << "options.temporalHintControl = " + << "0x" << std::hex << options->temporalHintControl << std::dec << "\n"; +#endif dumpFile << "options.enablePrimGeneratedQuery = " << options->enablePrimGeneratedQuery << "\n"; dumpFile << "options.disablePerCompFetch = " << options->disablePerCompFetch << "\n"; dumpFile << "options.optimizePointSizeWrite = " << options->optimizePointSizeWrite << "\n"; @@ -1367,6 +1386,10 @@ void PipelineDumper::dumpRayTracingStateInfo(const RayTracingPipelineBuildInfo * dumpFile << "libraryMode = " << static_cast(pipelineInfo->libraryMode) << "\n"; dumpFile << "mode = " << static_cast(pipelineInfo->mode) << "\n"; dumpFile << "cpsFlags = " << pipelineInfo->cpsFlags << "\n"; +#if LLPC_BUILD_GFX12 + dumpFile << "disableDynamicVgpr = " << pipelineInfo->disableDynamicVgpr << "\n"; + dumpFile << "dynamicVgprBlockSize =" << pipelineInfo->dynamicVgprBlockSize << "\n"; +#endif dumpRayTracingRtState(&pipelineInfo->rtState, dumpDir, dumpFile); dumpFile << "payloadSizeMaxInLib = " << pipelineInfo->payloadSizeMaxInLib << "\n"; dumpFile << "attributeSizeMaxInLib = " << pipelineInfo->attributeSizeMaxInLib << "\n"; @@ -1788,6 +1811,10 @@ MetroHash::Hash PipelineDumper::generateHashForRayTracingPipeline(const RayTraci hasher.Update(pipeline->indirectStageMask); hasher.Update(pipeline->mode); hasher.Update(pipeline->cpsFlags); +#if LLPC_BUILD_GFX12 + hasher.Update(pipeline->disableDynamicVgpr); + hasher.Update(pipeline->dynamicVgprBlockSize); +#endif updateHashForRtState(&pipeline->rtState, &hasher, isCacheHash); hasher.Update(pipeline->libraryMode); @@ -2060,6 +2087,9 @@ void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options hasher->Update(options->reverseThreadGroup); hasher->Update(options->internalRtShaders); hasher->Update(options->forceNonUniformResourceIndexStageMask); +#if LLPC_BUILD_GFX12 + hasher->Update(options->expertSchedulingMode); +#endif hasher->Update(options->getGlState().replaceSetWithResourceType); hasher->Update(options->getGlState().buildResourcesDataForShaderModule); hasher->Update(options->getGlState().disableTruncCoordForGather); @@ -2067,6 +2097,10 @@ void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options hasher->Update(options->getGlState().vertex64BitsAttribSingleLoc); hasher->Update(options->getGlState().enableFragColor); hasher->Update(options->getGlState().disableBaseVertex); +#if LLPC_BUILD_GFX12 + hasher->Update(options->cacheScopePolicyControl); + hasher->Update(options->temporalHintControl); +#endif hasher->Update(options->enablePrimGeneratedQuery); hasher->Update(options->getGlState().enablePolygonStipple); hasher->Update(options->getGlState().enableLineSmooth); @@ -2176,9 +2210,19 @@ void PipelineDumper::updateHashForPipelineShaderInfo(ShaderStage stage, const Pi hasher->Update(options.constantBufferBindingOffset); hasher->Update(options.backwardPropagateNoContract); hasher->Update(options.forwardPropagateNoContract); +#if LLPC_BUILD_GFX12 + hasher->Update(options.workgroupRoundRobin); +#endif hasher->Update(options.imageSampleDrefReturnsRgba); hasher->Update(options.disableGlPositionOpt); hasher->Update(options.viewIndexFromDeviceIndex); +#if LLPC_BUILD_GFX12 + if (options.cachePolicyLlc.resourceCount > 0) { + hasher->Update(reinterpret_cast(options.cachePolicyLlc.noAllocs), + sizeof(unsigned) * options.cachePolicyLlc.resourceCount); + } + hasher->Update(options.temporalHintShaderControl); +#endif hasher->Update(options.forceUnderflowPrevention); hasher->Update(options.forceMemoryBarrierScope); hasher->Update(options.scheduleStrategy); diff --git a/tool/vfx/vfxVkSection.h b/tool/vfx/vfxVkSection.h index 66302452b3..40545f4ca8 100644 --- a/tool/vfx/vfxVkSection.h +++ b/tool/vfx/vfxVkSection.h @@ -260,11 +260,18 @@ class SectionShaderOption : public Section { SectionShaderOption() : Section(getAddrTable(), SectionTypeUnset, "options"), m_clientHash{} { memset(&m_state, 0, sizeof(m_state)); +#if LLPC_BUILD_GFX12 + m_cachePolicyLlc = &m_memory; +#endif } void getSubState(SubState &state) { m_state.clientHash.lower = m_clientHash.i64Vec2[0]; m_state.clientHash.upper = m_clientHash.i64Vec2[1]; +#if LLPC_BUILD_GFX12 + m_state.cachePolicyLlc.resourceCount = m_cachePolicyLlc->size(); + m_state.cachePolicyLlc.noAllocs = m_cachePolicyLlc->data(); +#endif state = m_state; }; SubState &getSubStateRef() { return m_state; } @@ -320,9 +327,16 @@ class SectionShaderOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, backwardPropagateNoContract, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, forwardPropagateNoContract, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, constantBufferBindingOffset, MemberTypeInt, false); +#if LLPC_BUILD_GFX12 + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, workgroupRoundRobin, MemberTypeBool, false); +#endif INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, imageSampleDrefReturnsRgba, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, disableGlPositionOpt, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, viewIndexFromDeviceIndex, MemberTypeBool, false); +#if LLPC_BUILD_GFX12 + INIT_MEMBER_NAME_TO_ADDR(SectionShaderOption, m_cachePolicyLlc, MemberTypeUArray, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, temporalHintShaderControl, MemberTypeInt, false); +#endif INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, forceUnderflowPrevention, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, forceMemoryBarrierScope, MemberTypeInt, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, scheduleStrategy, MemberTypeEnum, false); @@ -333,6 +347,10 @@ class SectionShaderOption : public Section { SubState m_state; IUFValue m_clientHash; +#if LLPC_BUILD_GFX12 + std::vector *m_cachePolicyLlc; + std::vector m_memory; +#endif }; // ===================================================================================================================== @@ -591,10 +609,17 @@ class SectionPipelineOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, optimizeTessFactor, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableInterpModePatch, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, pageMigrationEnabled, MemberTypeBool, false); +#if LLPC_BUILD_GFX12 + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, expertSchedulingMode, MemberTypeBool, false); +#endif INIT_MEMBER_NAME_TO_ADDR(SectionPipelineOption, m_glState, MemberTypeGlState, true); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enablePrimGeneratedQuery, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, disablePerCompFetch, MemberTypeBool, false); +#if LLPC_BUILD_GFX12 + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, cacheScopePolicyControl, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, temporalHintControl, MemberTypeInt, false); +#endif INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, optimizePointSizeWrite, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, padBufferSizeToNextDword, MemberTypeBool, false); INIT_MEMBER_NAME_TO_ADDR(SectionPipelineOption, m_compileTimeConstants, MemberTypeCompileConstInfo, true); @@ -1229,6 +1254,10 @@ class SectionRayTracingState : public Section { INIT_MEMBER_NAME_TO_ADDR(SectionRayTracingState, m_clientMetadata, MemberTypeU8Array, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, cpsFlags, MemberTypeInt, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, rtIgnoreDeclaredPayloadSize, MemberTypeBool, false); +#if LLPC_BUILD_GFX12 + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, disableDynamicVgpr, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionRayTracingState, dynamicVgprBlockSize, MemberTypeInt, false); +#endif INIT_MEMBER_DYNARRAY_NAME_TO_ADDR(SectionRayTracingState, m_gpurtOptions, MemberTypeGpurtOption, true); return addrTableInitializer; }(); diff --git a/util/gpurtshim/CMakeLists.txt b/util/gpurtshim/CMakeLists.txt index 7732a6e4c7..a1992c10bf 100644 --- a/util/gpurtshim/CMakeLists.txt +++ b/util/gpurtshim/CMakeLists.txt @@ -35,5 +35,10 @@ if(LLPC_RAY_TRACING AND NOT LLPC_IS_STANDALONE) # Link against vkgc_headers to pull in the necessary include directories and all the VKI_* defines target_link_libraries(vkgc_gpurtshim PUBLIC vkgc_headers) target_link_libraries(vkgc_gpurtshim PRIVATE gpurt) +#if LLPC_BUILD_GFX12 + if(LLPC_BUILD_GFX12) + target_compile_definitions(vkgc_gpurtshim PUBLIC LLPC_BUILD_GFX12) + endif() +#endif endif() #endif diff --git a/util/gpurtshim/GpurtShim.cpp b/util/gpurtshim/GpurtShim.cpp index 834ce78554..a99bb2b32c 100644 --- a/util/gpurtshim/GpurtShim.cpp +++ b/util/gpurtshim/GpurtShim.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -38,6 +38,17 @@ using namespace Vkgc; RtIpVersion gpurt::getRtIpVersion(GfxIpVersion gfxIpVersion) { +#if LLPC_BUILD_GFX12 + if (gfxIpVersion.major >= 12) { +#if GPURT_BUILD_RTIP3_1 + return {3, 1}; +#elif GPURT_BUILD_RTIP3 + return {3, 0}; +#else + return {0, 0}; +#endif + } +#endif if (gfxIpVersion.major >= 11) return {2, 0}; if (gfxIpVersion >= GfxIpVersion{10, 3}) @@ -52,6 +63,10 @@ static Pal::RayTracingIpLevel getRtIpLevel(RtIpVersion rtIpVersion) { {{1, 0}, Pal::RayTracingIpLevel::RtIp1_0}, {{1, 1}, Pal::RayTracingIpLevel::RtIp1_0}, {{2, 0}, Pal::RayTracingIpLevel::RtIp2_0}, +#if LLPC_BUILD_GFX12 + {{3, 0}, Pal::RayTracingIpLevel::RtIp3_0}, + {{3, 1}, Pal::RayTracingIpLevel::RtIp3_1}, +#endif }; // clang-format on @@ -95,7 +110,15 @@ void gpurt::getFuncTable(RtIpVersion rtIpVersion, GpurtFuncTable &table) { Pal::RayTracingIpLevel rtIpLevel = getRtIpLevel(rtIpVersion); GpuRt::EntryFunctionTable gpurtTable; +#if LLPC_BUILD_GFX12 +#if GPURT_BUILD_RTIP3 + GpuRt::QueryRayTracingEntryFunctionTable(rtIpLevel, true, &gpurtTable); +#else GpuRt::QueryRayTracingEntryFunctionTable(rtIpLevel, &gpurtTable); +#endif +#else + GpuRt::QueryRayTracingEntryFunctionTable(rtIpLevel, &gpurtTable); +#endif unmangleDxilName(table.pFunc[RT_ENTRY_TRACE_RAY], gpurtTable.traceRay.pTraceRay); unmangleDxilName(table.pFunc[RT_ENTRY_TRACE_RAY_INLINE], gpurtTable.rayQuery.pTraceRayInline); diff --git a/version/CMakeLists.txt b/version/CMakeLists.txt index 350e7672ae..8c1a388ddc 100644 --- a/version/CMakeLists.txt +++ b/version/CMakeLists.txt @@ -90,6 +90,16 @@ llpc_set_property(llpc_version INTERFACE LLPC_BUILD_STRIX1 ON "HW_STRIX1") llpc_set_property(llpc_version INTERFACE LLPC_BUILD_STRIX_HALO ON "HW_STRIX_HALO") #endif +#if LLPC_BUILD_GFX12 +### GFX12 device support +llpc_set_property(llpc_version INTERFACE LLPC_BUILD_GFX12 ON "HW_GFX12") +# Set in parent scope so that LLVM can see it. +set(LLPC_BUILD_GFX12 "${LLPC_BUILD_GFX12}" PARENT_SCOPE) +#endif +#if LLPC_BUILD_NAVI48 +llpc_set_property(llpc_version INTERFACE LLPC_BUILD_NAVI48 ON "HW_NAVI48") +#endif + # Report the summary of what is enabled. message(STATUS "llpc_version:${LLPC_SET_PROPERTY_SUMMARY_llpc_version}") diff --git a/version/include/llpc/GpurtEnums.h b/version/include/llpc/GpurtEnums.h index ca968f56c4..fd05f20ed1 100644 --- a/version/include/llpc/GpurtEnums.h +++ b/version/include/llpc/GpurtEnums.h @@ -63,6 +63,12 @@ enum class RayTracingIpLevel : uint32_t { _None = 0, RtIp1_1 = 11, RtIp2_0 = 20, +#if LLPC_BUILD_GFX12 + RtIp3_0 = 30, +#endif +#if LLPC_BUILD_GFX12 + RtIp3_1 = 31, +#endif }; // CPS Scheduling levels. diff --git a/version/include/llpcVersion.h.in b/version/include/llpcVersion.h.in index 0b64411a53..9598d7de39 100644 --- a/version/include/llpcVersion.h.in +++ b/version/include/llpcVersion.h.in @@ -42,6 +42,9 @@ // | 75.10| Add temporalHintShaderControl to PipelineShaderOptions | // | 75.9 | Add rtIgnoreDeclaredPayloadSize to RayTracingPipelineBuildInfo. | // | 75.8 | Add forceMemoryBarrierScope to PipelineShaderOptions. | +#if LLPC_BUILD_GFX12 +// | 75.7 | Add temporalHintControl and cachePolicyLlc to PipelineOptions. | +#endif // | 75.6 | Add enableRemapLocation to PipelineOptions. Add outLocationMaps to GraphicsPipelineBuildInfo. | // | 75.5 | Add optimizePointSizeWrite to PipelineShaderOptions in order to optimize the case PointSize = 1.0. | // | 75.4 | Add disableGlPositionOpt to PipelineShaderOptions. | @@ -72,6 +75,9 @@ // | Add columnCount to ResourceNodeData. | // | Support vertex fetch in Byte, refine vertex fetch to always fetch in Component. | // | 70.4 | Add LibraryMode and pLibrarySummaries to RayTracingPipelineBuildInfo | +#if LLPC_BUILD_GFX12 +// | 70.3 | Add disableDynamicVgpr to RayTracingPipelineBuildInfo | +#endif // | 70.2 | Add useSoftwareVertexBufferDescriptors to GraphicsPipelineBuildInfo | // | 70.1 | Add cpsFlags to RayTracingPipelineBuildInfo | // | 70.0 | Add enablePrimGeneratedQuery to PipelineOptions | @@ -87,7 +93,13 @@ // | 65.4 | Add disableSampleMask to PipelineOptions | // | 65.3 | Add originUpperLeft to GraphicsPipelineBuildInfo | // | 65.2 | Support SPIRV extended vertex attribute formats during vertex fetch module. | +#if LLPC_BUILD_GFX12 +// | 65.1 | Add cacheScopePolicyControl to PipelineOptions | +#endif // | 65.0 | Remove updateDescInElf | +#if LLPC_BUILD_GFX12 +// | 64.3 | Add workgroupRoundRobin to PipelineShaderOptions. | +#endif // | 64.2 | Add dynamicSampleInfo to GraphicsPipelineBuildInfo::rsState | // | 64.1 | Add disableTruncCoordForGather to PipelineOptions. | // | 64.0 | Add enableColorExportShader to GraphicsPipelineBuildInfo. | @@ -112,6 +124,9 @@ // | 61.2 | Add pClientMetadata and clientMetadataSize to all PipelineBuildInfos | // | 61.1 | Add IPipelineDumper::GetGraphicsShaderBinaryHash | // | 61.0 | Add DescriptorMutable type and ResourceMappingNode::strideInDwords to support mutable descriptors | +#if LLPC_BUILD_GFX12 +// | 60.1 | Add expertSchedulingMode in PipelineOptions | +#endif // | 60.0 | Simplify the enum NggCompactMode to a boolean flag | // | 59.0 | Remove the option enableVertexReuse from NggState | // | 57.2 | Move all internal resource binding id to enum InternalBinding. |