From eaf7c1d444cbf7f4ca31dfd24d213632afdae3ab Mon Sep 17 00:00:00 2001 From: qiaojbao Date: Fri, 28 Feb 2025 11:36:03 +0800 Subject: [PATCH] Update llpc from commit c3573fc3 SPIRVReader: add support of int4 in vector relevant ops llvmraytracing: Improve rematerialization heuristic lgc: Remove getTypeSizeInBytes() in InitializeWorkgroupMemory Refactor coherent/volatile [Continuations] Generate init.whole.wave in lgc Add Convergent to ImageSample instruction lgc: Don't lift readfirstlane across convergent operations [llvmraytracing] Share CPS scheduling level with GPURT Emulate v_dot2_bf16_bf16 on Gfx11 [llvmraytracing] Some cleanups Rework mesh shader attribute export cmake: Updates for standalone compiler build [lgc] Re-enable two gfx10 cooperative matrix tests Support the output of array type when autoLayoutDesc lgc: Add llvm sched strategy option [llvmraytracing] Add dummy hitattrs + padding + payload arguments to RayGen shaders Update API for new LLVM upstream [llvmraytracing] Fix issue in LowerRaytracingPipelinePass::extendArgumentStruct uniform constant is readonly memory Rework vertex exports in NGG Vulkan Global Buffer use for CPS Fix amdllpc crash when compile mesh shader Enable LLVM_OPTIMIZED_TABLEGEN for multi-config generators Fix to make LLPC_BUILD_ vars globally visible Prepare tests for entry_point metadata change Add subgroupShuffle amber tests [llvmraytracing] Add support for _AmdTraceRayMakePC [lgcdis] add function to disassemble a single symbol from ELF vkgc: Add missing RtIpVersion comparison operator [llvmraytracing] Move 'Do not autogenerate' hint [llvmraytracing] Don't pass local variables into visitor payload Fix build error for ShaderDbg Fix bug in lgc CreateSubgroupClusteredReduction Update update_llpc_test_checks.py compilerutils: handle GlobalVariables compilerutils: Add helpers that work in gdb [llvmraytracing] Move lowering of AmdGetShaderKind [lgcdis] add error recovery support Fix GFX11 GDS synchronization issue lgc: Fix compiler warning [lgcdis] remove unnecessary output when disassembling a single symbol Store mesh outputs to allocas if possible Cmake: LLPC_ENABLE_WERROR tidy-up [llvmraytracing] Test update Fix YCBCrSampler Add native llvm-dialects-tblgen for cross-compiling Fix tessellation I/O with read-back-only outputs Clear mesh outputComponents if outputLocInfoMap is empty Caclulate the PM4 CRC with the generated SP3 Fix deprecation compiler warnings [llvmraytracing] Move BB label helpers to CompilerUtils [llvmraytracing] Rename `CompilerUtils` namespace Add Strix Halo support --- CMakeLists.txt | 43 +- cmake/CompilerFlags.cmake | 14 +- cmake/CompilerStandalone.cmake | 74 +- cmake/DialectsTablegen.cmake | 39 + cmake/FindShaderDbg.cmake | 49 + cmake/llpc_version.cmake | 2 +- cmake/llpcdeps.cmake | 38 + cmake/llvm.cmake | 39 +- cmake/sharedme/xdl.cmake | 34 + .../include/compilerutils/ArgPromotion.h | 6 +- .../include/compilerutils/CompilerUtils.h | 26 +- .../include/compilerutils/DxilToLlvm.h | 6 +- .../include/compilerutils/DxilUtils.h | 6 +- .../compilerutils/IRSerializationUtils.h | 8 +- .../compilerutils/LoweringPointerTupleMap.h | 6 +- .../include/compilerutils/TypeLowering.h | 15 +- .../compilerutils/ValueOriginTracking.h | 6 +- .../compilerutils/ValueSpecialization.h | 6 +- compilerutils/include/public/.clang-format | 1 + .../public/AmdExtD3DShaderIntrinsics.h | 614 +++ compilerutils/lib/ArgPromotion.cpp | 10 +- compilerutils/lib/CompilerUtils.cpp | 102 +- compilerutils/lib/DxilToLlvm.cpp | 46 +- compilerutils/lib/IRSerializationUtils.cpp | 12 +- compilerutils/lib/TypeLowering.cpp | 37 +- compilerutils/lib/ValueOriginTracking.cpp | 13 +- .../lib/ValueOriginTrackingTestPass.cpp | 8 +- .../lib/ValueOriginTrackingTestPass.h | 6 +- compilerutils/lib/ValueSpecialization.cpp | 8 +- .../lib/ValueSpecializationTestPass.cpp | 8 +- .../lib/ValueSpecializationTestPass.h | 6 +- compilerutils/plugin/Plugin.cpp | 4 +- .../inc/link-constant-expr-global.ll | 26 + .../inc/link-control-flow.ll | 26 + .../inc/link-existing-func-name.ll | 26 + .../inc/link-existing-global-name.ll | 26 + .../inc/link-fold-const.ll | 26 + .../inc/link-func-metadata.ll | 26 + .../inc/link-global-initializer.ll | 26 + .../inc/link-global-same-struct.ll | 26 + .../inc/link-global-same-type.ll | 26 + .../cross-module-inliner/inc/link-simple.ll | 26 + .../inc/link-struct-ptr-argument.ll | 26 + .../link-constant-expr-global.ll | 26 + .../cross-module-inliner/link-control-flow.ll | 26 + .../link-existing-func-name.ll | 26 + .../link-existing-global-multi.ll | 26 + .../link-existing-global-name.ll | 26 + .../cross-module-inliner/link-fold-const.ll | 26 + .../link-func-metadata.ll | 26 + .../link-global-initializer.ll | 26 + .../link-global-same-struct.ll | 26 + .../link-global-same-type.ll | 26 + .../test/cross-module-inliner/link-simple.ll | 26 + .../link-struct-ptr-argument.ll | 26 + compilerutils/test/lit.cfg.py | 25 + compilerutils/test/lit.site.cfg.py.in | 25 + .../test/value-origin-tracking/assumptions.ll | 26 + .../test/value-origin-tracking/basic-tests.ll | 26 + .../test/value-origin-tracking/freeze-mode.ll | 26 + .../value-origin-tracking/max-value-size.ll | 26 + .../test/value-origin-tracking/slice-sizes.ll | 26 + .../test/value-origin-tracking/vector.ll | 26 + .../value-specialization/specialization.ll | 26 + .../cross-module-inline.cpp | 4 +- docs/DdnDebugPrintf.md | 86 +- imported/llvm-dialects | 2 +- include/vkgcBase.h | 4 +- include/vkgcDefs.h | 12 + lgc/CMakeLists.txt | 18 +- lgc/builder/BuilderBase.cpp | 4 +- lgc/builder/BuilderImpl.cpp | 35 +- lgc/builder/BuilderRecorder.cpp | 4 +- lgc/builder/BuilderRecorder.h | 2 +- lgc/builder/BuilderReplayer.cpp | 11 +- lgc/builder/ImageBuilder.cpp | 9 +- lgc/builder/MatrixBuilder.cpp | 96 +- lgc/builder/SubgroupBuilder.cpp | 43 +- lgc/disassembler/CMakeLists.txt | 4 +- lgc/disassembler/Disassembler.cpp | 315 +- lgc/include/lgc/builder/BuilderImpl.h | 6 +- .../lgc/lowering/AddBufferOperationMetadata.h | 8 +- .../lgc/lowering/CombineCooperativeMatrix.h | 4 +- .../lgc/lowering/LowerBufferOperations.h | 4 +- .../lgc/lowering/LowerCooperativeMatrix.h | 126 +- lgc/include/lgc/lowering/LowerDebugPrintf.h | 3 + lgc/include/lgc/lowering/LowerGpuRt.h | 2 + lgc/include/lgc/lowering/MutateEntryPoint.h | 9 +- lgc/include/lgc/state/IntrinsDefs.h | 1 + lgc/include/lgc/util/AddressExtender.h | 12 +- lgc/include/lgc/util/BufferResource.h | 44 + lgc/interface/lgc/Builder.h | 1 + lgc/interface/lgc/BuilderCommon.h | 23 +- lgc/interface/lgc/Disassembler.h | 16 +- lgc/interface/lgc/LgcDialect.h | 56 +- lgc/interface/lgc/LgcDialect.td | 475 +-- lgc/interface/lgc/Pipeline.h | 21 +- lgc/lowering/AddLoopMetadata.cpp | 2 +- lgc/lowering/CollectResourceUsage.cpp | 212 +- lgc/lowering/CombineCooperativeMatrix.cpp | 19 +- lgc/lowering/Continufy.cpp | 31 +- lgc/lowering/FragmentColorExport.cpp | 3 +- lgc/lowering/LgcLowering.cpp | 4 +- lgc/lowering/LowerBufferOperations.cpp | 99 +- lgc/lowering/LowerCooperativeMatrix.cpp | 68 +- lgc/lowering/LowerDebugPrintf.cpp | 49 +- lgc/lowering/LowerGpuRt.cpp | 39 + lgc/lowering/LowerInOut.cpp | 44 +- lgc/lowering/LowerPopsInterlock.cpp | 86 +- lgc/lowering/LowerReadFirstLane.cpp | 13 +- lgc/lowering/MeshTaskShader.cpp | 1219 ++++-- lgc/lowering/MeshTaskShader.h | 60 +- lgc/lowering/MutateEntryPoint.cpp | 134 +- lgc/lowering/NggPrimShader.cpp | 2322 ++++++----- lgc/lowering/NggPrimShader.h | 65 +- lgc/lowering/PassRegistry.inc | 2 +- lgc/lowering/PreparePipelineAbi.cpp | 253 +- lgc/lowering/RegisterMetadataBuilder.cpp | 12 +- lgc/lowering/SetupTargetFeatures.cpp | 13 +- lgc/lowering/ShaderMerger.cpp | 12 +- lgc/lowering/StructurizeBuffers.cpp | 2 +- lgc/state/PipelineState.cpp | 14 +- lgc/state/TargetInfo.cpp | 19 +- lgc/test/BuiltIns/cs-deviceindex.lgc | 26 + lgc/test/BuiltIns/cs-globalinvocationid.lgc | 26 + lgc/test/BuiltIns/cs-localinvocationid.lgc | 26 + lgc/test/BuiltIns/cs-localinvocationindex.lgc | 26 + lgc/test/BuiltIns/cs-numsubgroups.lgc | 26 + lgc/test/BuiltIns/cs-numworkgroups.lgc | 30 +- lgc/test/BuiltIns/cs-subgroupid.lgc | 26 + .../BuiltIns/cs-subgrouplocalinvocationid.lgc | 26 + lgc/test/BuiltIns/cs-subgroupsize.lgc | 26 + lgc/test/BuiltIns/cs-workgroupid.lgc | 30 +- lgc/test/BuiltIns/cs-workgroupsize.lgc | 26 + lgc/test/CMakeLists.txt | 8 +- lgc/test/CallLibFromCs-indirect.lgc | 26 + lgc/test/CallLibFromCs.lgc | 26 + lgc/test/CallLibFromCsPayload.lgc | 26 + lgc/test/CleanUndefOutputValues.lgc | 26 + lgc/test/ComputeLibraryPushConstantSpill.lgc | 26 + lgc/test/CsBPermuteWave64.lgc | 26 + lgc/test/CsComputeLibrary.lgc | 26 + lgc/test/CsComputeLibraryPayload.lgc | 26 + lgc/test/CsLowerDebugPrintf.lgc | 26 + lgc/test/CsReconfigWorkgroup.lgc | 26 + lgc/test/ElfRelocationAndNote.lgc | 26 + lgc/test/ElfRelocationSize.lgc | 26 + lgc/test/FDot2Gfx1010.lgc | 26 + lgc/test/ImageSampleNoReturn.lgc | 26 + lgc/test/InOutPackingNonZeroBase.lgc | 26 + lgc/test/IntToPtrWithAdd.lgc | 26 + lgc/test/MulDx9Zero.lgc | 26 + lgc/test/NggInPassthroughMode.lgc | 26 + lgc/test/PartPipeline.lgc | 26 + lgc/test/PatchInvalidImageDescriptor.lgc | 26 + lgc/test/PeepholeOptPhiWithIdenticalLoad.lgc | 26 + lgc/test/PhiWithArgument.lgc | 26 + .../ScalarizeInputWithDynamicIndexUser.lgc | 26 + lgc/test/ShaderStages.lgc | 26 + lgc/test/SubgroupClusteredReduction.lgc | 26 + lgc/test/TaskShaderEntryArgs.lgc | 26 + lgc/test/TaskShaderOps.lgc | 26 + lgc/test/TaskShaderRegConfig.lgc | 30 +- lgc/test/TestWaterfallLoopForStruct.lgc | 26 + lgc/test/TextureRange.lgc | 25 + .../CombineCooperativeMatrix/constants.lgc | 76 +- .../CombineCooperativeMatrix/matmul-loop.lgc | 62 +- .../packed-accumulators.lgc | 818 ++-- .../CombineCooperativeMatrix/simple.lgc | 96 +- .../unhandled-inout.lgc | 302 +- .../CpsLowering/bad-max-argument-vgprs.lgc | 70 + .../CpsLowering/continuation-basic.lgc | 184 +- .../CpsLowering/cps-entry-point.lgc | 50 +- .../CpsLowering/cps-stack-lowering.lgc | 709 ++-- .../CpsLowering/cps-unify-exits-no-iww.lgc | 383 ++ .../CpsLowering/cps-unify-exits.lgc | 436 +- .../missing-max-argument-vgprs.lgc | 68 + .../InvariantStartUserWithPhiNode.lgc | 26 + .../LowerBufferOperations/buffer-index-op.lgc | 26 + .../buffer.atomic.ops.lgc | 26 + .../LowerBufferOperations/simple.lgc | 26 + .../strided-buffer-ops.lgc | 110 +- .../LowerBufferOperations/uniform-phi.lgc | 26 + .../LowerCooperativeMatrix/convert.lgc | 58 +- .../LowerCooperativeMatrix/extract-insert.lgc | 40 +- .../LowerCooperativeMatrix/gfx1010muladd.lgc | 51 +- .../LowerCooperativeMatrix/gfx1011muladd.lgc | 52 +- .../LowerCooperativeMatrix/gfx1100muladd.lgc | 38 +- .../LowerCooperativeMatrix/load-wave64.lgc | 46 +- .../packed-accumulators-wave64.lgc | 48 +- .../LowerCooperativeMatrix/store-wave64.lgc | 40 +- .../Transforms/LowerDebugPrintf/basic.lgc | 26 + .../LowerGpuRt/init-static-id-op.lgc | 26 + .../LowerGpuRt/lower-gpurt-dialect-op.lgc | 61 + .../PeepholeOptLog2PowUnderflow.lgc | 26 + .../ReadFirstLane/PatchReadLane.lgc | 26 + .../Transforms/ReadFirstLane/issue2746.lgc | 26 + lgc/test/Transforms/ReadFirstLane/simple.lgc | 26 + lgc/test/UberFetchShader.lgc | 26 + lgc/test/WorkgroupIdOpt.lgc | 26 + lgc/test/lgc-tool.lgc | 26 + lgc/test/lgcdis-longjump.lgc | 26 + lgc/test/lgcdis-table_jump.lgc | 26 + lgc/test/lgcdis.lgc | 26 + lgc/test/lit.cfg.py | 25 + lgc/test/lit.site.cfg.py.in | 37 + .../scalarizationOfDescriptorLoadsTest1.lgc | 26 + .../scalarizationOfDescriptorLoadsTest10.lgc | 26 + .../scalarizationOfDescriptorLoadsTest11.lgc | 26 + .../scalarizationOfDescriptorLoadsTest12.lgc | 26 + .../scalarizationOfDescriptorLoadsTest13.lgc | 26 + .../scalarizationOfDescriptorLoadsTest14.lgc | 26 + .../scalarizationOfDescriptorLoadsTest15.lgc | 26 + .../scalarizationOfDescriptorLoadsTest16.lgc | 26 + .../scalarizationOfDescriptorLoadsTest2.lgc | 26 + .../scalarizationOfDescriptorLoadsTest3.lgc | 26 + .../scalarizationOfDescriptorLoadsTest4.lgc | 26 + .../scalarizationOfDescriptorLoadsTest5.lgc | 26 + .../scalarizationOfDescriptorLoadsTest6.lgc | 26 + .../scalarizationOfDescriptorLoadsTest7.lgc | 26 + .../scalarizationOfDescriptorLoadsTest8.lgc | 26 + .../scalarizationOfDescriptorLoadsTest9.lgc | 26 + .../gfx1150_ds_subdword_workaround.lgc | 26 + lgc/test/tanh.lgc | 26 + lgc/tool/lgc/CMakeLists.txt | 4 +- lgc/tool/lgc/lgc.cpp | 5 +- lgc/tool/lgcdis/CMakeLists.txt | 4 +- lgc/tool/lgcdis/lgcdis.cpp | 6 +- lgc/unittests/lit.site.cfg.py.in | 25 + lgc/util/AddressExtender.cpp | 8 +- lgc/util/BufferResource.cpp | 73 + llpc/CMakeLists.txt | 35 +- llpc/context/llpcCompiler.cpp | 10 + llpc/context/llpcContext.cpp | 9 +- llpc/context/llpcPipelineContext.cpp | 14 +- llpc/lowering/LinkTransformShaders.cpp | 2 +- llpc/lowering/LowerAdvancedBlend.cpp | 2 +- llpc/lowering/LowerCooperativeMatrix.cpp | 11 +- llpc/lowering/LowerGlCompatibility.cpp | 99 +- llpc/lowering/LowerGlCompatibility.h | 4 +- llpc/lowering/LowerGlobals.cpp | 7 +- llpc/lowering/LowerMath.cpp | 14 +- llpc/lowering/LowerMath.h | 1 - llpc/lowering/LowerMemoryOp.cpp | 2 +- llpc/lowering/LowerRayTracing.cpp | 4 +- llpc/lowering/Lowering.cpp | 1 - llpc/lowering/PrepareContinuations.cpp | 15 +- .../lowering/PrepareTransformVertexShader.cpp | 4 +- llpc/lowering/ProcessGfxRuntimeLibrary.cpp | 2 +- llpc/lowering/ProcessGpuRtLibrary.cpp | 27 +- llpc/lowering/ProcessGpuRtLibrary.h | 1 + llpc/test/lit.site.cfg.py.in | 27 +- .../bugs/ArrayOfVariablePointers.spvasm | 8 + .../bugs/PipelineCs_SpillThresholdEnable.pipe | 8 + .../core/FMA_TestOperandIsZero.spvasm | 8 + ...Array_Load_With_Array_Load_Result_lit.frag | 7 + .../core/OOB_Check_Dependent_Load_lit.frag | 7 + .../core/OOB_Check_Load_Array_Loop_lit.frag | 7 + .../core/OOB_Check_Load_Array_lit.frag | 7 + .../OOB_Check_Load_Array_with_Struct_lit.frag | 7 + .../OOB_Check_Load_Matrix_Vector_lit.frag | 7 + .../core/OOB_Check_Load_Matrix_lit.frag | 7 + .../OOB_Check_Load_Nested_Struct_lit.frag | 7 + .../core/OOB_Check_Load_Struct_lit.frag | 7 + .../core/OOB_Check_Load_Vector_lit.frag | 7 + .../core/OOB_Check_Multiple_Load_lit.frag | 7 + .../core/OOB_Check_Optimization_lit.frag | 7 + .../core/OOB_Check_Store_Array_lit.frag | 7 + .../core/OOB_Check_Store_Struct_lit.frag | 7 + ...onUniform_TestTexutreLoadStoreInt64.spvasm | 8 + ...ccessChain_TestBlockVectorExtract_lit.frag | 7 + ...Chain_TestGeneralVarVectorExtract_lit.frag | 7 + ...ccessChain_TestInOutVectorExtract_lit.frag | 7 + ...AccessChain_TestMultiLevelChain_lit.spvasm | 8 + ..._TestOutBlockMemberLocUnspecified_lit.vert | 7 + ...in_TestRowMajorBlockVectorExtract_lit.frag | 7 + ...essChain_TestUniformVectorExtract_lit.frag | 7 + .../core/OpAll_TestBoolConst_lit.frag | 7 + .../shaderdb/core/OpAll_TestBvec4_lit.frag | 7 + .../core/OpAny_TestBoolConst_lit.frag | 7 + .../shaderdb/core/OpAny_TestBvec2_lit.frag | 7 + .../core/OpArrayLength_TestGeneral_lit.frag | 7 + ...OpAtomicAnd_TestInt64ImageAtomicAnd.spvasm | 8 + ...change_TestInt64ImageAtomicCompSwap.spvasm | 8 + ...icCompareExchange_TestStrongCompare.spvasm | 8 + ...change_TestInt64ImageAtomicExchange.spvasm | 8 + ...pAtomicIAdd_TestInt64ImageAtomicAdd.spvasm | 8 + ...ement_TestInt64ImageAtomicDecrement.spvasm | 8 + ...omicIDecrement_TestStorageBlock_lit.spvasm | 8 + ...ement_TestInt64ImageAtomicIncrement.spvasm | 8 + ...omicIIncrement_TestStorageBlock_lit.spvasm | 8 + ...cIIncrement_TestVariablePointer_lit.spvasm | 8 + ...pAtomicISub_TestInt64ImageAtomicSub.spvasm | 8 + ...AtomicLoad_TestInt64ImageAtomicLoad.spvasm | 8 + .../OpAtomicLoad_TestStorageBlock_lit.spvasm | 8 + .../OpAtomicOr_TestInt64ImageAtomicOr.spvasm | 8 + ...pAtomicSMax_TestInt64ImageAtomicMax.spvasm | 8 + ...pAtomicSMin_TestInt64ImageAtomicMin.spvasm | 8 + ...omicStore_TestInt64ImageAtomicStore.spvasm | 8 + .../OpAtomicStore_TestStorageBlock_lit.spvasm | 8 + ...pAtomicUMax_TestInt64ImageAtomicMax.spvasm | 8 + ...pAtomicUMin_TestInt64ImageAtomicMin.spvasm | 8 + .../OpAtomicXXX_TestImageDimension_lit.comp | 25 + ...tomicXXX_TestImageMemoryQualifier_lit.comp | 25 + .../core/OpAtomicXXX_TestImage_lit.comp | 25 + .../core/OpAtomicXXX_TestImage_lit.frag | 7 + .../OpAtomicXXX_TestSharedVariable_lit.comp | 25 + ...stStorageBlockAndSharedWithData64_lit.comp | 25 + .../OpAtomicXXX_TestStorageBlock_lit.frag | 7 + ...OpAtomicXor_TestInt64ImageAtomicXor.spvasm | 8 + .../core/OpBitCount_TestIntConst_lit.frag | 7 + .../shaderdb/core/OpBitCount_TestInt_lit.frag | 7 + .../core/OpBitCount_TestIvec4_lit.frag | 7 + .../core/OpBitCount_TestUint_lit.frag | 7 + .../OpBitFieldInsert_TestIntConst_lit.frag | 7 + .../core/OpBitFieldInsert_TestInt_lit.frag | 7 + .../core/OpBitFieldInsert_TestIvec4_lit.frag | 7 + .../core/OpBitFieldInsert_TestUint_lit.frag | 7 + .../OpBitFieldSExtract_TestGeneral_lit.frag | 7 + .../OpBitFieldSExtract_TestIntConst_lit.frag | 7 + .../OpBitFieldUExtract_TestGeneral_lit.frag | 7 + .../core/OpBitFieldUExtract_TestUint_lit.frag | 7 + .../core/OpBitReverse_TestIntConst_lit.frag | 7 + .../core/OpBitReverse_TestInt_lit.frag | 7 + .../core/OpBitReverse_TestUint_lit.frag | 7 + .../core/OpBitcast_TestIvec3ToUvec3_lit.frag | 7 + .../core/OpBitcast_TestUintToInt_lit.frag | 7 + .../core/OpBitwiseAnd_TestUvec3_lit.frag | 7 + .../core/OpBitwiseOr_TestUvec3_lit.frag | 7 + .../core/OpBitwiseXor_TestUvec3_lit.frag | 7 + .../OpBranchConditional_TestBreakInLoop.frag | 7 + ...BranchConditional_TestComplexContinue.frag | 7 + ...pBranchConditional_TestContinueInLoop.frag | 7 + .../core/OpBranchConditional_TestDoWhile.frag | 7 + .../core/OpBranchConditional_TestIf.frag | 7 + .../core/OpBranchConditional_TestIfElse.frag | 7 + .../core/OpBranchConditional_TestLoop.frag | 7 + ...pBranchConditional_TestLoopContinue.spvasm | 8 + .../OpBranchConditional_TestLoopNoBody.spvasm | 8 + .../OpBranchConditional_TestSuccessiveIf.frag | 7 + .../core/OpBranch_TestEarlyReturn.frag | 7 + .../OpBranch_TestUnreachableContinue.frag | 7 + .../OpBranch_TestUnreachableSwitch.spvasm | 8 + .../OpBranch_TestUnsequentialBlock.spvasm | 8 + ...CompositeConstruct_TestArrayConstruct.frag | 7 + ...ompositeConstruct_TestMatrixConstruct.frag | 7 + ...ompositeConstruct_TestStructConstruct.frag | 7 + ...CompositeConstruct_TestVecConstruct.spvasm | 8 + ...ompositeConstruct_TestVectorConstruct.frag | 7 + ...OpCompositeConstruct_TestVectorMatrix.frag | 7 + .../OpCompositeExtract_TestArrayExtract.frag | 7 + .../OpCompositeExtract_TestVectorExtract.frag | 7 + ...iteInsert_TestScalarInsertedToArray.spvasm | 8 + ...teInsert_TestScalarInsertedToMatrix.spvasm | 8 + .../OpCompositeInsert_TestVectorInsert.frag | 7 + .../OpConstantComposite_TestVectorMatrix.frag | 7 + .../core/OpConstantNull_TestScalar.spvasm | 8 + .../core/OpConstantNull_TestStruct.spvasm | 8 + .../OpConstantNull_TestVecMatArray.spvasm | 8 + ...pConstantNull_TestVectorMatrixArray.spvasm | 8 + .../OpControlBarrier_TestGeneral_lit.comp | 25 + .../OpConvertFToS_TestDoubleToInt_lit.frag | 7 + .../OpConvertFToS_TestVec4ToIvec4_lit.frag | 7 + .../OpConvertFToU_TestDvec3ToUvec3_lit.frag | 7 + .../OpConvertFToU_TestVec2ToUvec2_lit.frag | 7 + .../OpConvertSToF_TestIvec2ToVec2_lit.frag | 7 + .../OpConvertSToF_TestIvec4ToDvec4_lit.frag | 7 + .../OpConvertUToF_TestUvec3ToDvec3_lit.frag | 7 + .../OpConvertUToF_TestUvec3ToVec3_lit.frag | 7 + .../core/OpCopyLogical_TestGeneral_lit.comp | 25 + ...opyMemory_TestCopyLocalToOutput_lit.spvasm | 8 + ...yMemory_TestCopyUniformToOutput_lit.spvasm | 8 + ...opyMemory_TestExtraMemoryAccess_lit.spvasm | 8 + .../core/OpCopyMemory_TestStruct_lit.spvasm | 8 + .../core/OpCopyObject_TestNonUniform.spvasm | 8 + .../core/OpCopyObject_TestVec4_lit.spvasm | 8 + .../core/OpDPdx_TestFineCoarse_lit.frag | 7 + .../core/OpDPdy_TestFineCoarse_lit.frag | 7 + ...onGroup_TestGroupAndGroupMember_lit.spvasm | 8 + .../shaderdb/core/OpDot_TestDvec_lit.frag | 7 + .../shaderdb/core/OpDot_TestFloat_lit.frag | 7 + .../test/shaderdb/core/OpDot_TestVec_lit.frag | 7 + .../OpEmitStreamVertex_TestGeneral_lit.geom | 2 + .../core/OpEmitVertex_TestGeneral_lit.geom | 2 + ...ryPoint_TesListAllGlobalVariables_lit.frag | 7 + .../OpExecutionModeId_TestLocalSizeId.spvasm | 8 + .../OpExtInst_NMinNMaxNaNFlags_lit.spvasm | 8 + .../shaderdb/core/OpExtInst_PackHalf2x16.comp | 25 + .../shaderdb/core/OpFAdd_TestMatrix_lit.frag | 7 + .../shaderdb/core/OpFAdd_TestVector_lit.frag | 7 + .../core/OpFConvert_TestDmat4ToMat4_lit.frag | 7 + .../OpFConvert_TestDoubleToFloat_lit.frag | 7 + .../OpFConvert_TestMat2X3ToDmat2X3_lit.frag | 7 + .../OpFConvert_TestRoundingModeRTN_lit.spvasm | 8 + .../OpFConvert_TestRoundingModeRTP_lit.spvasm | 8 + .../core/OpFConvert_TestVec3ToDvec3_lit.frag | 7 + .../shaderdb/core/OpFDiv_TestVector_lit.frag | 7 + .../shaderdb/core/OpFMod_TestDvec4_lit.frag | 7 + .../shaderdb/core/OpFMod_TestFloat_lit.frag | 7 + .../shaderdb/core/OpFMod_TestVec4_lit.frag | 7 + .../shaderdb/core/OpFMul_TestMatrix_lit.frag | 7 + .../core/OpFMul_TestOperandIsZero.spvasm | 8 + .../shaderdb/core/OpFMul_TestVector_lit.frag | 7 + .../core/OpFNegate_TestDvec3_lit.frag | 7 + .../core/OpFNegate_TestMat2X3_lit.frag | 7 + .../shaderdb/core/OpFNegate_TestVec3_lit.frag | 7 + .../core/OpFOrdEqual_TestVec3_lit.frag | 7 + .../OpFOrdGreaterThanEqual_TestFloat_lit.frag | 7 + .../OpFOrdGreaterThanEqual_TestVec3_lit.frag | 7 + .../core/OpFOrdGreaterThan_TestFloat_lit.frag | 7 + .../core/OpFOrdGreaterThan_TestVec3_lit.frag | 7 + .../OpFOrdLessThanEqual_TestFloat_lit.frag | 7 + .../OpFOrdLessThanEqual_TestVec3_lit.frag | 7 + .../core/OpFOrdLessThan_TestFloat_lit.frag | 7 + .../core/OpFOrdLessThan_TestVec3_lit.frag | 7 + .../core/OpFOrdNotEqual_TestVec3_lit.frag | 7 + .../core/OpFOrdSLessThan_TestFloat_lit.frag | 7 + .../OpFOrdULessThanEqual_TestFloat_lit.frag | 7 + .../core/OpFOrdULessThan_TestFloat_lit.frag | 7 + .../shaderdb/core/OpFSub_TestMatrix_lit.frag | 7 + .../shaderdb/core/OpFSub_TestVector_lit.frag | 7 + .../OpFunctionCall_TestArguTexArray_lit.frag | 7 + ...OpFunctionCall_TestManyParameters_lit.frag | 7 + ...ionCall_TestNumericReturnAndInout_lit.frag | 7 + .../OpFunctionCall_TestParamConst_lit.frag | 7 + ...OpFunctionCall_TestParamSimpleTex_lit.frag | 7 + .../OpFunctionCall_TestParamTexArray_lit.frag | 7 + ...nctionCall_TestParamTexNestedCall_lit.frag | 7 + ...nctionCall_TestVoidReturnAndInout_lit.frag | 7 + .../core/OpFunction_TestDontInline.spvasm | 8 + ...OpFunction_TestInlineDontInline_lit.spvasm | 8 + .../core/OpFwidth_TestFineCoarse_lit.frag | 7 + ...pGroupNonUniformBroadcast_ToShuffle.spvasm | 8 + ...roupNonUniformBroadcast_ToWaterfall.spvasm | 8 + .../shaderdb/core/OpGroupNonUniformMax.comp | 25 + .../core/OpGroupNonUniformQuadSwap.comp | 25 + .../core/OpIAddCarry_TestGeneral_lit.frag | 7 + .../core/OpIAddCarry_TestInt_lit.frag | 7 + .../core/OpIAddCarry_TestUvec4_lit.frag | 7 + .../shaderdb/core/OpIAdd_TestVector_lit.frag | 7 + .../shaderdb/core/OpIEqual_TestIvec2_lit.frag | 7 + .../shaderdb/core/OpIMul_TestVector_lit.frag | 7 + .../core/OpINotEqual_TestIvec2_lit.frag | 7 + .../core/OpINotEqual_TestSignMatch_lit.vert | 7 + .../OpINotEqual_TestSignedUnsigned_lit.frag | 7 + .../core/OpISubBorrow_TestGeneral_lit.frag | 7 + .../core/OpISubBorrow_TestInt_lit.frag | 7 + .../shaderdb/core/OpISub_TestVector_lit.frag | 7 + .../core/OpImageDrefGather_TestBasic_lit.frag | 7 + .../OpImageDrefGather_TestOffset_lit.frag | 7 + ...refGather_TestTextureGatherOffset_lit.frag | 7 + ...efGather_TestTextureGatherOffsets_lit.frag | 7 + ...ImageDrefGather_TestTextureGather_lit.frag | 7 + ...mageExplicitLod_TestDrefLodOffset_lit.frag | 7 + ..._Test2DMSArray_disableShadowTable_lit.frag | 7 + .../core/OpImageFetch_Test2DMSArray_lit.frag | 7 + .../core/OpImageFetch_Test2DMS_lit.frag | 7 + .../core/OpImageFetch_TestBasic_lit.frag | 7 + .../core/OpImageFetch_TestBuffer_lit.comp | 25 + .../OpImageFetch_TestDynamicOffset.spvasm | 8 + .../OpImageFetch_TestIntegerSampler_lit.frag | 7 + .../core/OpImageFetch_TestOffset_lit.frag | 7 + ...OpImageFetch_TestTexelFetchOffset_lit.frag | 7 + .../core/OpImageFetch_TestTexelFetch_lit.frag | 7 + .../core/OpImageGather_TestBasic_lit.frag | 7 + .../OpImageGather_TestConstOffsets_lit.frag | 7 + ...pImageGather_TestDrefConstOffsets_lit.frag | 7 + .../OpImageGather_TestIntegerSampler.frag | 7 + .../core/OpImageGather_TestOffset_lit.frag | 7 + ...geGather_TestTextureGatherBiasLod_lit.frag | 7 + ...ageGather_TestTextureGatherOffset_lit.frag | 7 + ...geGather_TestTextureGatherOffsets_lit.frag | 7 + .../OpImageGather_TestTextureGather_lit.frag | 7 + .../OpImageQueryLevels_TestBasic_lit.comp | 25 + ...ueryLevels_TestTextureQueryLevels_lit.frag | 7 + .../core/OpImageQueryLod_TestBasic_lit.frag | 7 + ...ImageQueryLod_TestTextureQueryLod_lit.frag | 7 + .../OpImageQuerySamples_TestBasic_lit.comp | 25 + ...mageQuerySamples_TestImageSamples_lit.frag | 7 + ...geQuerySamples_TestTextureSamples_lit.frag | 7 + ...ImageQuerySizeLod_TestTextureSize_lit.frag | 7 + .../core/OpImageQuerySize_TestBasic_lit.frag | 7 + .../OpImageQuerySize_TestImageSize_lit.frag | 7 + .../core/OpImageQuerySize_TestImage_lit.comp | 25 + .../OpImageQuerySize_TestSeparated_lit.frag | 7 + .../OpImageQuerySize_TestTextureSize_lit.frag | 7 + ...geReadWrite_TestImageLoadStoreLod_lit.comp | 25 + .../core/OpImageRead_Test2DMS_lit.comp | 25 + .../core/OpImageRead_TestBasic_lit.comp | 25 + .../core/OpImageRead_TestBuffer_lit.comp | 25 + .../core/OpImageRead_TestCube_lit.comp | 25 + .../core/OpImageRead_TestImageLoad_lit.frag | 7 + .../OpImageRead_TestInt64ImageLoad.spvasm | 8 + .../core/OpImageRead_TestIntImage_lit.comp | 25 + .../OpImageRead_TestMemoryQualifier_lit.comp | 25 + .../OpImageRead_TestNonVec4Data_lit.spvasm | 8 + .../OpImageRead_TestSubpassInput_lit.frag | 7 + ...mageSampleDrefExplicitLod_TestDrefLod.frag | 7 + ...SampleDrefExplicitLod_TestTextureGrad.frag | 7 + ...eDrefExplicitLod_TestTextureGradClamp.frag | 7 + ...DrefExplicitLod_TestTextureGradOffset.frag | 7 + ...eSampleDrefExplicitLod_TestTextureLod.frag | 7 + ...eDrefExplicitLod_TestTextureLodOffset.frag | 7 + ...OpImageSampleDrefImplicitLod_TestDref.frag | 7 + ...ageSampleDrefImplicitLod_TestDrefBias.frag | 7 + ...efImplicitLod_TestImageWithoutDepth.spvasm | 8 + ...mageSampleDrefImplicitLod_TestTexture.frag | 7 + ...eDrefImplicitLod_TestTextureBiasClamp.frag | 7 + ...ampleDrefImplicitLod_TestTextureClamp.frag | 7 + ...eDrefImplicitLod_TestTextureGradClamp.frag | 7 + ...mplicitLod_TestTextureGradOffsetClamp.frag | 7 + ...mpleDrefImplicitLod_TestTextureOffset.frag | 7 + ...refImplicitLod_TestTextureOffsetClamp.frag | 7 + .../OpImageSampleExplicitLod_TestLod_lit.frag | 7 + ...eExplicitLod_TestTextureGradClamp_lit.frag | 7 + ...ExplicitLod_TestTextureGradOffset_lit.frag | 7 + ...SampleExplicitLod_TestTextureGrad_lit.frag | 7 + ...eExplicitLod_TestTextureLodOffset_lit.frag | 7 + ...eSampleExplicitLod_TestTextureLod_lit.frag | 7 + ...mageSampleImplicitLod_Test1DArray_lit.frag | 7 + .../OpImageSampleImplicitLod_Test1D_lit.frag | 7 + ...mageSampleImplicitLod_Test2DArray_lit.frag | 7 + ...ImageSampleImplicitLod_Test2DRect_lit.frag | 7 + .../OpImageSampleImplicitLod_Test3D_lit.frag | 7 + ...ImplicitLod_TestArrayDirectAccess_lit.frag | 7 + ...pImageSampleImplicitLod_TestBasic_lit.frag | 7 + ...OpImageSampleImplicitLod_TestBias_lit.frag | 7 + ...geSampleImplicitLod_TestCubeArray_lit.frag | 7 + ...eSampleImplicitLod_TestCubeShadow_lit.frag | 7 + ...OpImageSampleImplicitLod_TestCube_lit.frag | 7 + ...ageSampleImplicitLod_TestDrefGrad_lit.frag | 7 + ...OpImageSampleImplicitLod_TestGrad_lit.frag | 7 + ...pleImplicitLod_TestIntegerSampler_lit.frag | 7 + ...Lod_TestMultiDimArrayDirectAccess_lit.frag | 7 + ...ImageSampleImplicitLod_TestOffset_lit.frag | 7 + ...mplicitLod_TestProjDrefGradOffset_lit.frag | 7 + ...ageSampleImplicitLod_TestSeparate_lit.frag | 7 + ...licitLod_TestSignExtendZeroExtend_lit.frag | 7 + ...eImplicitLod_TestTextureBiasClamp_lit.frag | 7 + ...ampleImplicitLod_TestTextureClamp_lit.frag | 7 + ...eImplicitLod_TestTextureGradClamp_lit.frag | 7 + ...citLod_TestTextureGradOffsetClamp_lit.frag | 7 + ...mplicitLod_TestTextureOffsetClamp_lit.frag | 7 + ...mpleImplicitLod_TestTextureOffset_lit.frag | 7 + ...mageSampleImplicitLod_TestTexture_lit.frag | 7 + ...leProjDrefExplicitLod_TestProjDrefLod.frag | 7 + ...ojDrefExplicitLod_TestTextureProjGrad.frag | 7 + ...ExplicitLod_TestTextureProjGradOffset.frag | 7 + ...rojDrefExplicitLod_TestTextureProjLod.frag | 7 + ...fExplicitLod_TestTextureProjLodOffset.frag | 7 + ...ampleProjDrefImplicitLod_TestProjComp.frag | 7 + ...ampleProjDrefImplicitLod_TestProjDref.frag | 7 + ...leProjDrefImplicitLod_TestTextureProj.frag | 7 + ...pImageSampleProjDref_TestProjDrefBias.frag | 7 + ...mageSampleProjExplicitLod_TestProjLod.frag | 7 + ...leProjExplicitLod_TestTextureProjGrad.frag | 7 + ...ExplicitLod_TestTextureProjGradOffset.frag | 7 + ...pleProjExplicitLod_TestTextureProjLod.frag | 7 + ...jExplicitLod_TestTextureProjLodOffset.frag | 7 + ...OpImageSampleProjImplicitLod_TestProj.frag | 7 + ...ageSampleProjImplicitLod_TestProjBias.frag | 7 + ...SampleProjImplicitLod_TestTextureProj.frag | 7 + ...ProjImplicitLod_TestTextureProjOffset.frag | 7 + ...xplicitLod_TestTextureInNonFragShader.vert | 7 + .../OpImageSample_TestSeparateSampler.pipe | 8 + ...OpImageSample_TestSeparateSampler_lit.frag | 7 + ...rseDrefGather_TestSparseTextureGather.frag | 7 + ...eSparseGather_TestSparseTextureGather.frag | 7 + ...Gather_TestSparseTextureGatherBiasLod.frag | 7 + ...SparseRead_TestInt64SparseImageLoad.spvasm | 8 + ...OpImageSparseRead_TestSparseImageLoad.frag | 7 + ...xplicitLod_TestSparseTextureGradClamp.frag | 7 + ...eDrefExplicitLod_TestSparseTextureLod.frag | 7 + ...mpleDrefImplicitLod_TestSparseTexture.frag | 7 + ...mplicitLod_TestSparseTextureBiasClamp.frag | 7 + ...refImplicitLod_TestSparseTextureClamp.frag | 7 + ...mplicitLod_TestSparseTextureGradClamp.frag | 7 + ...tLod_TestSparseTextureGradOffsetClamp.frag | 7 + ...licitLod_TestSparseTextureOffsetClamp.frag | 7 + ...mpleExplicitLod_TestSparseTextureGrad.frag | 7 + ...xplicitLod_TestSparseTextureGradClamp.frag | 7 + ...ampleExplicitLod_TestSparseTextureLod.frag | 7 + ...seSampleImplicitLod_TestSparseTexture.frag | 7 + ...mplicitLod_TestSparseTextureBiasClamp.frag | 7 + ...pleImplicitLod_TestSparseTextureClamp.frag | 7 + ...mplicitLod_TestSparseTextureGradClamp.frag | 7 + ...tLod_TestSparseTextureGradOffsetClamp.frag | 7 + ...licitLod_TestSparseTextureOffsetClamp.frag | 7 + ...ImageSparseTexelsResident_TestGeneral.frag | 7 + .../shaderdb/core/OpImageWrite_Test2DMS.comp | 25 + .../shaderdb/core/OpImageWrite_TestBasic.comp | 25 + .../core/OpImageWrite_TestBuffer.comp | 25 + .../OpImageWrite_TestBufferNonVec4Data.spvasm | 8 + .../shaderdb/core/OpImageWrite_TestCube.comp | 25 + .../OpImageWrite_TestDifferentFormat.comp | 25 + .../core/OpImageWrite_TestImageStore.frag | 7 + .../OpImageWrite_TestInt64ImageStore.spvasm | 8 + .../core/OpImageWrite_TestIntImage.comp | 25 + .../OpImageWrite_TestIntImage_Aliased.spvasm | 8 + .../OpImageWrite_TestMemoryQualifier.comp | 25 + .../core/OpImageWrite_TestNonVec4Data.spvasm | 8 + .../shaderdb/core/OpIsInf_TestDouble_lit.frag | 7 + .../shaderdb/core/OpIsInf_TestFloat_lit.frag | 7 + .../shaderdb/core/OpIsNan_TestDvec2_lit.frag | 7 + .../shaderdb/core/OpIsNan_TestFloat_lit.frag | 7 + .../shaderdb/core/OpIsNan_TestVec4_lit.frag | 7 + .../core/OpKill_TestFunctionBranch_lit.spvasm | 8 + .../core/OpKill_TestFunctionDynamic_lit.frag | 7 + .../OpKill_TestFunctionInlineReturn_lit.frag | 7 + .../core/OpKill_TestFunctionInline_lit.frag | 7 + .../OpKill_TestFunctionUnreachable_lit.spvasm | 8 + .../shaderdb/core/OpKill_TestGeneral_lit.frag | 7 + .../shaderdb/core/OpLine_TestGeneral.spvasm | 8 + .../core/OpLoad_TestAggregate_lit.frag | 7 + .../shaderdb/core/OpLoad_TestMatrix_lit.frag | 7 + .../shaderdb/core/OpLogicalAnd_TestBvec4.frag | 7 + .../core/OpLogicalEqual_TestGeneral.frag | 7 + .../OpLogicalNotEqual_TestGeneral_lit.frag | 7 + .../shaderdb/core/OpLogicalNot_TestBasic.frag | 7 + .../core/OpLogicalNot_TestBool_lit.frag | 7 + .../shaderdb/core/OpLogicalOr_TestBvec2.frag | 7 + .../OpLoopMerge_TestDependencyLength.spvasm | 8 + .../core/OpLoopMerge_TestDontUnroll.spvasm | 8 + ...OpLoopMerge_TestIterationControls_lit.frag | 7 + .../core/OpLoopMerge_TestPartialCount.spvasm | 8 + ...MatrixTimesMatrix_TestDmat2xDmat2_lit.frag | 7 + ...ixTimesMatrix_TestDmat4X3xDmat3X4_lit.frag | 7 + ...trixTimesMatrix_TestMat2X3xMat4X3_lit.frag | 7 + ...OpMatrixTimesMatrix_TestMat2xMat2_lit.frag | 7 + ...OpMatrixTimesMatrix_TestMat3xMat3_lit.frag | 7 + ...OpMatrixTimesMatrix_TestMat4xMat4_lit.frag | 7 + ...atrixTimesScalar_TestDmat3xDouble_lit.frag | 7 + ...atrixTimesScalar_TestDoublexDmat4_bit.frag | 7 + ...atrixTimesScalar_TestMat3X4xFloat_lit.frag | 7 + ...TimesScalar_TestMat4X2xConstFloat_lit.frag | 7 + ...trixTimesVector_TestDmat2X3xDvec2_lit.frag | 7 + ...MatrixTimesVector_TestDmat2xDvec2_lit.frag | 7 + ...trixTimesVector_TestDmat4X2xDvec4_lit.frag | 7 + ...MatrixTimesVector_TestMat3X4xVec4_lit.frag | 7 + ...OpMatrixTimesVector_TestMat3xVec3_lit.frag | 7 + ...oryBarrier_TestGroupMemoryBarrier_lit.comp | 25 + ...ryBarrier_TestMemoryBarrierBuffer_lit.frag | 7 + ...oryBarrier_TestMemoryBarrierImage_lit.frag | 7 + ...ryBarrier_TestMemoryBarrierShared_lit.comp | 25 + ...OpMemoryBarrier_TestMemoryBarrier_lit.comp | 25 + .../core/OpModuleProcessed_TestGeneral.spvasm | 8 + .../shaderdb/core/OpNoLine_TestGeneral.spvasm | 8 + .../shaderdb/core/OpNop_TestGeneral.spvasm | 8 + .../shaderdb/core/OpNot_TestUint_lit.frag | 7 + .../OpOuterProduct_TestDvec3xDvec2_lit.frag | 7 + .../OpOuterProduct_TestVec2xVec2_lit.frag | 7 + .../OpOuterProduct_TestVec2xVec4_lit.frag | 7 + .../OpOuterProduct_TestVec3xVec2_lit.frag | 7 + .../OpOuterProduct_TestVec3xVec4_lit.frag | 7 + .../core/OpPhi_Switch_FunctionCall_Phi.spvasm | 8 + .../OpPhi_TestMultiIncomingFromSwitch.spvasm | 8 + .../core/OpPhi_TestPhiInSelfLoop.spvasm | 8 + .../shaderdb/core/OpPtrDiff_Buffer_mem.spvasm | 8 + .../OpPtrDiff_TestVariablePointers.spvasm | 8 + .../core/OpPtrDiff_Workgroup_mem.spvasm | 8 + llpc/test/shaderdb/core/OpPtrEqualTest.spvasm | 8 + .../core/OpPtrEqual_TestNullPointerCmp.spvasm | 8 + .../OpPtrNotEqual_TestWorkgroupCmp.spvasm | 8 + .../OpQuantizeToF16_TestGeneral_lit.spvasm | 8 + .../OpReturnValue_TestEarlyReturn_lit.frag | 7 + ...eturnValue_TestReturnInNestedLoop_lit.frag | 7 + .../shaderdb/core/OpSDiv_TestIvec2_lit.frag | 7 + .../core/OpSDotAccSat_TestIVec.spvasm | 8 + .../core/OpSDotAccSat_TestIVec16bit.spvasm | 8 + .../test/shaderdb/core/OpSDot_TestIVec.spvasm | 8 + ...erThanEqual_TestSignedAndUnsigned_lit.frag | 7 + ...GreaterThan_TestSignedAndUnsigned_lit.frag | 7 + ...ssThanEqual_TestSignedAndUnsigned_lit.frag | 7 + ...OpSLessThan_TestSignedAndUnsigned_lit.frag | 7 + .../shaderdb/core/OpSMod_TestInt_lit.frag | 7 + .../shaderdb/core/OpSMod_TestIvec2_lit.frag | 7 + .../core/OpSMulExtended_TestGeneral_lit.frag | 7 + .../core/OpSMulExtended_TestInt_lit.frag | 7 + .../shaderdb/core/OpSNegate_TestInt_lit.frag | 7 + .../core/OpSNegate_TestUvec2_lit.frag | 7 + .../core/OpSUDotAccSat_TestIUVec.spvasm | 8 + .../shaderdb/core/OpSUDot_TestSIVec.spvasm | 8 + .../core/OpSelect_TestDescriptorArray.spvasm | 8 + .../core/OpSelect_TestGeneral_lit.frag | 7 + ...OpSelect_TestSelectBetweenObjects_lit.frag | 7 + .../core/OpSelect_TestSharedVariable.spvasm | 8 + ...pSelectionMerge_TestDontFlatten_lit.spvasm | 8 + .../OpSelectionMerge_TestFlatten_lit.spvasm | 8 + .../OpShiftLeftLogical_TestIvec2_lit.frag | 7 + .../OpShiftLeftLogical_TestUvec3_lit.frag | 7 + .../OpShiftRightArithmetic_TestIvec4_lit.frag | 7 + .../OpShiftRightLogical_TestUvec3_lit.frag | 7 + .../shaderdb/core/OpShiftXXX_TestInt_lit.frag | 7 + .../core/OpShiftXXX_TestUInt_lit.frag | 7 + .../core/OpShift_Testi16shift64_lit.spvasm | 8 + .../core/OpShift_Testi32shift64_lit.spvasm | 8 + .../core/OpShift_Testi64shift16_lit.spvasm | 8 + .../OpSourceContinued_TestNormalString.spvasm | 8 + .../core/OpSource_TestSourceString.spvasm | 8 + .../core/OpSource_TestUnknownLang.spvasm | 8 + ...OpSpecConstantOp_TestArithLogicOp_lit.frag | 7 + ...ecConstantOp_TestCompositeExtract_lit.frag | 7 + ...cConstantOp_TestCompositeInsert_lit.spvasm | 8 + ...pecConstantOp_TestNestedSpecConstOp.spvasm | 8 + ...onstantOp_TestQuantizeFlushToZero_lit.pipe | 8 + ...pecConstantOp_TestQuantizeToF16_lit.spvasm | 8 + .../OpSpecConstantOp_TestVectorRelated.spvasm | 8 + .../OpSpecConstantOp_TestVectorShuffle.frag | 7 + .../OpSpecConstant_TestWorkGroupSize_lit.comp | 25 + .../shaderdb/core/OpStore_TestMatrix_lit.frag | 7 + .../OpSwitch_Test64BitCaseLabel_lit.spvasm | 8 + .../core/OpSwitch_TestFallThrough_lit.frag | 7 + .../core/OpSwitch_TestGeneral_lit.frag | 7 + .../core/OpSwitch_TestMergedBranches_lit.frag | 7 + .../core/OpTranspose_TestDmat2X3_lit.frag | 7 + .../core/OpTranspose_TestMat2X3_lit.frag | 7 + .../core/OpTranspose_TestMat3X4_lit.frag | 7 + .../core/OpTranspose_TestMat4_lit.frag | 7 + ...peSampledImage_TestWaterfallInsertion.frag | 7 + ...peSampledImage_TestWaterfallScalarize.frag | 7 + ...Image_TestWaterfallScalarizeVgprLimit.frag | 7 + ...age_TestWaterfallScalarize_MultiBlock.frag | 7 + ...age_TestWaterfallScalarize_SharedDesc.frag | 7 + llpc/test/shaderdb/core/OpUDiv_TestUvec3.frag | 7 + .../shaderdb/core/OpUDiv_TestUvec3_lit.frag | 7 + .../core/OpUDotAccSat_TestUVec.spvasm | 8 + .../core/OpUDotAccSat_TestUVec16bit.spvasm | 8 + .../test/shaderdb/core/OpUDot_TestUVec.spvasm | 8 + llpc/test/shaderdb/core/OpUMod_TestUInt.frag | 7 + .../shaderdb/core/OpUMod_TestUInt_lit.frag | 7 + .../shaderdb/core/OpUMod_TestUintConst.frag | 7 + .../core/OpUMod_TestUintConst_lit.frag | 7 + llpc/test/shaderdb/core/OpUMod_TestUvec3.frag | 7 + .../shaderdb/core/OpUMod_TestUvec3_lit.frag | 7 + .../core/OpUMulExtended_TestUint.frag | 7 + .../core/OpUMulExtended_TestUint_lit.frag | 7 + .../core/OpUmulExtended_TestGeneral.frag | 7 + .../core/OpUmulExtended_TestGeneral_lit.frag | 7 + .../core/OpUndef_TestRuntimeArray.spvasm | 8 + .../core/OpUndef_TestScalarArray.spvasm | 8 + .../core/OpUndef_TestUndefImage.spvasm | 8 + .../core/OpUndef_TestUndefImage_lit.spvasm | 8 + .../core/OpUnreachable_TestGeneral.spvasm | 8 + .../core/OpUnreachable_TestGeneral_lit.spvasm | 8 + .../core/OpVariable_TestInitializer.spvasm | 8 + .../OpVariable_TestInitializer_lit.spvasm | 8 + .../OpVectorExtractDynamic_TestDvec3.frag | 7 + .../OpVectorExtractDynamic_TestDvec3_lit.frag | 7 + ...pVectorExtractDynamic_TestUintIndex.spvasm | 8 + ...torExtractDynamic_TestUintIndex_lit.spvasm | 8 + .../core/OpVectorInsertDynamic_TestDvec2.frag | 7 + .../OpVectorInsertDynamic_TestDvec2_lit.frag | 7 + .../core/OpVectorInsertDynamic_TestVec4.frag | 7 + .../OpVectorInsertDynamic_TestVec4_lit.frag | 7 + ...ffle_TestDifferentInputVecSizes_lit.spvasm | 8 + .../core/OpVectorShuffle_TestDvec.frag | 7 + ...ectorShuffle_TestDvec4UndefVariable.spvasm | 8 + ...rShuffle_TestDvec4UndefVariable_lit.spvasm | 8 + ...Shuffle_TestDvec4UnspecifiedChannel.spvasm | 8 + ...fle_TestDvec4UnspecifiedChannel_lit.spvasm | 8 + .../core/OpVectorShuffle_TestDvec_lit.frag | 7 + .../core/OpVectorShuffle_TestVec.frag | 7 + ...rShuffle_TestVec4UnspecifiedChannel.spvasm | 8 + ...ffle_TestVec4UnspecifiedChannel_lit.spvasm | 8 + .../core/OpVectorShuffle_TestVec_lit.frag | 11 +- ...OpVectorTimesMatrix_TestDvec2xDmat4X2.frag | 7 + ...ctorTimesMatrix_TestDvec2xDmat4X2_lit.frag | 7 + ...OpVectorTimesMatrix_TestDvec3xDmat2X3.frag | 7 + ...ctorTimesMatrix_TestDvec3xDmat2X3_lit.frag | 7 + ...OpVectorTimesMatrix_TestDvec3xDmat4X3.frag | 7 + ...ctorTimesMatrix_TestDvec3xDmat4X3_lit.frag | 7 + .../OpVectorTimesMatrix_TestVec2xMat3X2.frag | 7 + ...VectorTimesMatrix_TestVec2xMat3X2_lit.frag | 7 + .../OpVectorTimesMatrix_TestVec4xMat4.frag | 7 + ...OpVectorTimesMatrix_TestVec4xMat4_lit.frag | 7 + .../OpVectorTimesScalar_TestDoublexDvec4.frag | 7 + ...ectorTimesScalar_TestDoublexDvec4_lit.frag | 7 + .../OpVectorTimesScalar_TestDvec4xDouble.frag | 7 + ...ectorTimesScalar_TestDvec4xDouble_lit.frag | 7 + .../OpVectorTimesScalar_TestIvec2xInt.frag | 7 + ...OpVectorTimesScalar_TestIvec2xInt_lit.frag | 7 + .../OpVectorTimesScalar_TestUvec4xUint.frag | 7 + ...pVectorTimesScalar_TestUvec4xUint_lit.frag | 7 + ...VectorTimesScalar_TestVec3xConstFloat.frag | 7 + ...orTimesScalar_TestVec3xConstFloat_lit.frag | 7 + .../OverrideThreadGroupSize16X16X1.spvasm | 8 + .../core/OverrideThreadGroupSize8X8X1.spvasm | 8 + .../test/shaderdb/core/ShaderRetInLoop.spvasm | 8 + .../TestEnableImplicitInvariantExports.vert | 7 + .../TestForceNonUniformResourceIndex.frag | 7 + .../TestNoContractBackwardPropagation.spvasm | 8 + .../TestNoContractForwardPropagation.spvasm | 8 + .../shaderdb/core/TestReverseThreadGroup.comp | 25 + .../shaderdb/core/TestThreadGroupSwizzle.comp | 25 + .../shaderdb/core/TestXfbStateMetadata.vert | 7 + .../shaderdb/debug_info/FunctionCall.pipe | 8 + .../debug_info/NonSemanticShaderDebug.pipe | 8 + ...PipelineGsTess_TestVsTesGsMergeShader.pipe | 53 +- .../PipelineGs_TestVsGSMergeShader.pipe | 8 + .../DebugInfo_DebugCompilationUnit.spvasm | 8 + .../avoid/DebugInfo_DebugDeclare.spvasm | 8 + .../avoid/DebugInfo_DebugExpression.spvasm | 8 + .../DebugInfo_DebugFunctionDeclaration.spvasm | 8 + .../avoid/DebugInfo_DebugLexicalBlock.spvasm | 8 + .../avoid/DebugInfo_DebugSourceNoText.spvasm | 8 + .../avoid/DebugInfo_DebugTypeArray.spvasm | 8 + .../avoid/DebugInfo_DebugTypeEnum.spvasm | 8 + .../avoid/DebugInfo_DebugTypeFunction.spvasm | 8 + .../DebugInfo_DebugTypeInheritance.spvasm | 8 + .../avoid/DebugInfo_DebugTypePointer.spvasm | 8 + .../avoid/DebugInfo_DebugTypeQualifier.spvasm | 8 + .../avoid/DebugInfo_DebugTypeVector.spvasm | 8 + .../avoid/DebugInfo_DebugTypedef.spvasm | 8 + .../avoid/DebugInfo_TestFsBasic.frag | 7 + .../avoid/DebugInfo_TestVsBasic.vert | 7 + .../GlslBadEntryPointName.frag | 7 + .../error_reporting/GlslDuplicateStage.frag | 7 + .../error_reporting/InvalidGfxip.frag | 7 + .../error_reporting/LlvmMissingShaderStage.ll | 26 + .../LlvmVerificationFailure.ll | 26 + .../MultipleThreadsVerboseOutput.spvasm | 8 + .../error_reporting/SpirvBadEntryPoint.spvasm | 8 + .../SpirvDuplicateStage.spvasm | 8 + .../error_reporting/SpirvInvalidOpcode.spvasm | 8 + .../SpirvMissingEntryPoint.spvasm | 8 + .../SpirvValidationFailure.spvasm | 8 + .../SpirvWildcardAndEntryPoint.spvasm | 8 + .../error_reporting/UnsupportedMCPUOption.ll | 26 + .../Ext16bitStorage_TestFpRoundMode.spvasm | 8 + .../Ext16bitStorage_TestFsInput_lit.frag | 7 + .../Ext16bitStorage_TestGsInput_lit.geom | 2 + .../Ext16bitStorage_TestGsOutput_lit.geom | 2 + .../Ext16bitStorage_TestTcsInput_lit.tesc | 2 + .../Ext16bitStorage_TestTcsOutput_lit.tesc | 2 + .../Ext16bitStorage_TestTesInput_lit.tese | 2 + .../Ext16bitStorage_TestTesOutput_lit.tese | 2 + .../Ext16bitStorage_TestVsInput_lit.vert | 7 + .../Ext16bitStorage_TestVsOutput_lit.vert | 7 + ...ExtBufferReference_TestPointerCasting.frag | 7 + .../ExtDemoteToHelper_TestDemote.frag | 7 + ...DemoteToHelper_TestIsHelperInvocation.frag | 7 + .../ExtDeviceGroup_TestComputeShader_lit.comp | 25 + ...ExtDeviceGroup_TestGraphicsShader_lit.vert | 7 + ...xtExplicitVertexParam_TestBuiltIn_lit.frag | 7 + ...xplicitVertexParam_TestInterpFunc_lit.frag | 7 + .../ExtFragMask_TestFragFetch_lit.frag | 7 + .../ExtGcnShader_TestBuiltInFunc_lit.frag | 7 + .../ExtGoogleHlslFunc_TestGeneral.spvasm | 8 + .../ExtMultiView_TestSubpassLoad_lit.pipe | 8 + ...ExtShaderBallot_TestArithmeticAMD_lit.frag | 7 + ...tShaderBallot_TestArithmeticData16AMD.frag | 7 + .../ExtShaderBallot_TestGeneral_lit.frag | 7 + .../ExtShaderBallot_TestMiscAMD_lit.frag | 7 + .../ExtShaderBallot_TestSwizzleAMD_lit.frag | 7 + ...ExtShaderFloat16Fetch_TestFetchData16.frag | 7 + ...xtShaderFloat16Fetch_TestGatherData16.frag | 7 + ...tShaderFloat16Fetch_TestImagingData16.frag | 7 + ...ShaderFloat16Fetch_TestSamplingData16.frag | 7 + ...loat16Fetch_TestSubpassSamplingData16.frag | 7 + ...tShaderFloat16_TestAngleTrigFuncs_lit.frag | 7 + ...ExtShaderFloat16_TestArithmeticOp_lit.frag | 7 + .../ExtShaderFloat16_TestCommonFuncs_lit.frag | 7 + .../ExtShaderFloat16_TestDerivFuncs_lit.frag | 7 + ...haderFloat16_TestExponentialFuncs_lit.frag | 7 + ...xtShaderFloat16_TestGeometryFuncs_lit.frag | 7 + .../ExtShaderFloat16_TestInterpFuncs_lit.frag | 7 + .../ExtShaderFloat16_TestMatrixFuncs_lit.frag | 7 + ...ShaderFloat16_TestPackUnpackFuncs_lit.frag | 7 + ...ShaderFloat16_TestRelationalFuncs_lit.frag | 7 + .../ExtShaderFloat16_TestSpecConst.frag | 7 + .../ExtShaderFloat16_TestStorageBlock.comp | 25 + ...loat16_TestStorageBlockRowMajorMatrix.comp | 25 + ...tShaderFloat16_TestTrinaryMinMaxFuncs.frag | 7 + .../ExtShaderFloat16_TestVectorMatrixOp.frag | 7 + .../ExtShaderInt16_TestBasicArithInt16.frag | 7 + .../ExtShaderInt16_TestBasicArithUint16.frag | 7 + .../ExtShaderInt16_TestBitwiseOp.comp | 25 + .../ExtShaderInt16_TestBuiltInFuncAbs.comp | 25 + ...ExtShaderInt16_TestBuiltInFuncBitConv.comp | 25 + .../ExtShaderInt16_TestBuiltInFuncFrexp.frag | 7 + ...haderInt16_TestBuiltInFuncMinMaxClamp.comp | 25 + .../ExtShaderInt16_TestBuiltInFuncMix.comp | 25 + .../ExtShaderInt16_TestBuiltInFuncPack.comp | 25 + .../ExtShaderInt16_TestBuiltInFuncSign.comp | 25 + .../ExtShaderInt16_TestBuiltInFuncUnpack.comp | 25 + ...tShaderInt16_TestBuiltInFuncVectorCmp.comp | 25 + .../extensions/ExtShaderInt16_TestNegate.comp | 25 + .../ExtShaderInt16_TestScalarCmp.comp | 25 + .../ExtShaderInt16_TestShiftOp.comp | 25 + .../ExtShaderInt16_TestSpecConst.frag | 7 + ...ExtShaderInt16_TestStorageBlockAccess.comp | 25 + ...ExtShaderInt16_TestTrinaryMinMaxFuncs.frag | 7 + .../ExtShaderInt16_TestTypeConvFromBool.comp | 25 + .../ExtShaderInt16_TestTypeConvFromFloat.comp | 25 + .../ExtShaderInt16_TestTypeConvFromInt.comp | 25 + .../ExtShaderInt16_TestTypeConvToBool.comp | 25 + .../ExtShaderInt16_TestTypeConvToFloat.comp | 25 + .../ExtShaderInt16_TestTypeConvToInt.comp | 25 + ...ExtShaderInt16_TestUniformBlockAccess.comp | 25 + .../ExtShaderInt64_TestArithmeticOp_lit.frag | 7 + .../ExtShaderInt64_TestBitwiseOp_lit.frag | 7 + .../ExtShaderInt64_TestBuiltInFunc_lit.frag | 7 + .../ExtShaderInt64_TestRelationalOp_lit.frag | 7 + .../ExtShaderInt64_TestShiftOp_lit.frag | 7 + .../ExtShaderInt64_TestTypeCast_lit.frag | 7 + .../ExtShaderInt8_TestBasicArithInt8.frag | 7 + .../ExtShaderInt8_TestBasicArithUint8.frag | 7 + .../ExtShaderInt8_TestBitwiseOp.comp | 25 + .../ExtShaderInt8_TestBufLoadStore.comp | 25 + .../ExtShaderInt8_TestBuiltInFuncAbs.comp | 25 + ...ShaderInt8_TestBuiltInFuncMinMaxClamp.comp | 25 + .../ExtShaderInt8_TestBuiltInFuncSign.comp | 25 + ...xtShaderInt8_TestBuiltInFuncVectorCmp.comp | 25 + .../extensions/ExtShaderInt8_TestFsInOut.frag | 7 + .../extensions/ExtShaderInt8_TestGsInOut.geom | 2 + .../extensions/ExtShaderInt8_TestNegate.comp | 25 + .../ExtShaderInt8_TestScalarCmp.comp | 25 + ...ShaderInt8_TestSharedVarLoadStore_lit.comp | 25 + .../ExtShaderInt8_TestShiftOp_lit.comp | 25 + .../ExtShaderInt8_TestSpecConst_lit.comp | 25 + .../ExtShaderInt8_TestTcsInOut.tesc | 2 + .../ExtShaderInt8_TestTesInOut.tese | 2 + .../ExtShaderInt8_TestTypeConvert_lit.comp | 25 + .../ExtShaderInt8_TestVsInOut_lit.vert | 9 +- .../ExtShaderVote_TestGeneral_lit.frag | 7 + ...ubgroupQuad_TestSubgroupQuadBroadcast.frag | 7 + ...roupQuad_TestSubgroupQuadSwapDiagonal.frag | 7 + ...roupQuad_TestSubgroupQuadSwapVertical.frag | 7 + .../ExtTrinaryMinMax_TestGeneral_lit.frag | 7 + .../ExtXfb_TessGsDoubleOutput_lit.geom | 2 + .../ExtXfb_TestGsFloatOutput_lit.geom | 2 + .../ExtXfb_TestNoXfbExecutionMode.spvasm | 8 + .../ExtXfb_TestTesDoubleOutput_lit.tese | 2 + .../ExtXfb_TestTesFloatOutput_lit.tese | 2 + .../ExtXfb_TestVsDoubleOutput_lit.vert | 7 + .../ExtXfb_TestVsFloatOutput_lit.vert | 7 + ...ObjFloat16_TestTrinaryMinMaxFuncs_lit.frag | 7 + .../extensions/OpExtInst_TestAbsDouble.frag | 7 + .../extensions/OpExtInst_TestAbsFloat.frag | 7 + .../extensions/OpExtInst_TestAbsInt.frag | 7 + .../extensions/OpExtInst_TestAbsIvec4.frag | 7 + .../extensions/OpExtInst_TestAbsVec4.frag | 7 + .../extensions/OpExtInst_TestAcos.frag | 7 + .../extensions/OpExtInst_TestAcosFloat.frag | 7 + .../extensions/OpExtInst_TestAcosh.frag | 7 + .../extensions/OpExtInst_TestAcoshFloat.frag | 7 + .../extensions/OpExtInst_TestAsin.frag | 7 + .../extensions/OpExtInst_TestAsinFloat.frag | 7 + .../extensions/OpExtInst_TestAsinh.frag | 7 + .../extensions/OpExtInst_TestAsinhFloat.frag | 7 + .../extensions/OpExtInst_TestAtan.frag | 7 + .../extensions/OpExtInst_TestAtan2.frag | 7 + .../extensions/OpExtInst_TestAtan2Float.frag | 7 + .../extensions/OpExtInst_TestAtanFloat.frag | 7 + .../extensions/OpExtInst_TestAtanh.frag | 7 + .../extensions/OpExtInst_TestAtanhFloat.frag | 7 + .../extensions/OpExtInst_TestCeilDouble.frag | 7 + .../extensions/OpExtInst_TestCeilFloat.frag | 7 + .../OpExtInst_TestCeilVec4Const.frag | 7 + .../extensions/OpExtInst_TestClampBasic.frag | 7 + .../extensions/OpExtInst_TestClampDouble.frag | 7 + .../extensions/OpExtInst_TestClampFloat.frag | 7 + .../extensions/OpExtInst_TestClampInt.frag | 7 + .../extensions/OpExtInst_TestClampUint.frag | 7 + .../extensions/OpExtInst_TestCos.frag | 7 + .../OpExtInst_TestCosVec4Const.frag | 7 + .../extensions/OpExtInst_TestCosh.frag | 7 + .../extensions/OpExtInst_TestCoshFloat.frag | 7 + .../extensions/OpExtInst_TestCrossDouble.frag | 7 + .../extensions/OpExtInst_TestCrossFloat.frag | 7 + .../extensions/OpExtInst_TestCrossVec4.frag | 7 + .../extensions/OpExtInst_TestDegrees.frag | 7 + .../OpExtInst_TestDegreesVec4Const.frag | 7 + .../OpExtInst_TestDeterminantDmat.frag | 7 + .../OpExtInst_TestDeterminantMat.frag | 7 + .../OpExtInst_TestDeterminantMat2.frag | 7 + .../OpExtInst_TestDeterminantMat4.frag | 7 + .../OpExtInst_TestDistanceBasic.frag | 7 + .../OpExtInst_TestDistanceDouble.frag | 7 + .../OpExtInst_TestDistanceFloat.frag | 7 + .../OpExtInst_TestDistanceVec4.frag | 7 + .../extensions/OpExtInst_TestExp.frag | 7 + .../extensions/OpExtInst_TestExp2.frag | 7 + .../OpExtInst_TestExp2Vec4Const.frag | 7 + .../OpExtInst_TestExpVec4Const.frag | 7 + .../extensions/OpExtInst_TestFaceForward.frag | 7 + .../OpExtInst_TestFaceForwardDouble.frag | 7 + .../OpExtInst_TestFaceForwardVec4.frag | 7 + .../extensions/OpExtInst_TestFindILsbInt.frag | 7 + .../OpExtInst_TestFindILsbUint.frag | 7 + .../extensions/OpExtInst_TestFindLsbInt.frag | 7 + .../extensions/OpExtInst_TestFindMsbInt.frag | 7 + .../extensions/OpExtInst_TestFindMsbUint.frag | 7 + .../extensions/OpExtInst_TestFindSMsb.frag | 7 + .../extensions/OpExtInst_TestFindUMsb.frag | 7 + .../OpExtInst_TestFloatBitsToInt_lit.frag | 7 + .../OpExtInst_TestFloatBitsToUint_lit.frag | 7 + .../OpExtInst_TestFloorDouble_lit.frag | 7 + .../OpExtInst_TestFloorFloat_lit.frag | 7 + .../OpExtInst_TestFloorVec4Const_lit.frag | 7 + .../OpExtInst_TestFmaDouble_lit.frag | 7 + .../OpExtInst_TestFmaFloat_lit.frag | 7 + .../OpExtInst_TestFmaVec4Const_lit.frag | 7 + .../OpExtInst_TestFractDouble_lit.frag | 7 + .../OpExtInst_TestFractFloat_lit.frag | 7 + .../OpExtInst_TestFractVec4Const-lit.frag | 7 + .../OpExtInst_TestFrexpDouble_lit.frag | 7 + .../OpExtInst_TestFrexpFloat_lit.frag | 7 + .../OpExtInst_TestFrexpStructDouble_lit.frag | 7 + .../OpExtInst_TestFrexpStructFloat_lit.frag | 7 + .../OpExtInst_TestFrexpStructVec4_lit.frag | 7 + .../OpExtInst_TestIntBitsToFloat_lit.frag | 7 + ..._TestInterpolateAtCentroidNoPersp_lit.frag | 7 + ...ExtInst_TestInterpolateAtCentroid_lit.frag | 7 + ...OpExtInst_TestInterpolateAtOffset_lit.frag | 7 + ...OpExtInst_TestInterpolateAtSample_lit.frag | 7 + ...pExtInst_TestInterpolateDynIdx1DArray.frag | 7 + ..._TestInterpolateDynIdx1DArrayInStruct.frag | 7 + ...st_TestInterpolateDynIdx1DStructArray.frag | 7 + ..._TestInterpolateDynIdx2DArrayInStruct.frag | 7 + ...terpolateDynIdx2DArrayInStructInArray.frag | 7 + ...st_TestInterpolateDynIdx2DStructArray.frag | 7 + ...pExtInst_TestInterpolateDynIdx3DArray.frag | 7 + ...OpExtInst_TestInterpolateDynIdxVector.frag | 7 + .../OpExtInst_TestInverseMat4_lit.frag | 7 + .../OpExtInst_TestInverseSqrtDouble_lit.frag | 7 + .../OpExtInst_TestInverseSqrtFloat_lit.frag | 7 + ...pExtInst_TestInverseSqrtVec4Const_lit.frag | 7 + .../OpExtInst_TestLdexpDouble_lit.frag | 7 + .../OpExtInst_TestLdexpFloat_lit.frag | 7 + .../OpExtInst_TestLdexpVec4_lit.frag | 7 + .../OpExtInst_TestLengthBasic_lit.frag | 7 + .../OpExtInst_TestLengthDouble_lit.frag | 7 + .../OpExtInst_TestLengthFloat_lit.frag | 7 + .../OpExtInst_TestLengthVec4_lit.frag | 7 + .../OpExtInst_TestLog2Vec4Const_lit.frag | 7 + .../extensions/OpExtInst_TestLog2_lit.frag | 7 + .../OpExtInst_TestLogVec4Const_lit.frag | 7 + .../extensions/OpExtInst_TestLog_lit.frag | 7 + .../OpExtInst_TestMatrixInverseDmat_lit.frag | 7 + .../OpExtInst_TestMatrixInverseMat_lit.frag | 7 + .../OpExtInst_TestMaxBasic_lit.frag | 7 + .../OpExtInst_TestMaxDouble_lit.frag | 7 + .../OpExtInst_TestMaxFloat_lit.frag | 7 + .../extensions/OpExtInst_TestMaxInt_lit.frag | 7 + .../extensions/OpExtInst_TestMaxUint_lit.frag | 7 + .../OpExtInst_TestMinBasic_lit.frag | 7 + .../OpExtInst_TestMinDouble_lit.frag | 7 + .../OpExtInst_TestMinFloat_lit.frag | 7 + .../extensions/OpExtInst_TestMinInt_lit.frag | 7 + .../extensions/OpExtInst_TestMinUint_lit.frag | 7 + .../OpExtInst_TestMixBasic_lit.frag | 7 + ...pExtInst_TestMixLinearBlendDouble_lit.frag | 7 + ...OpExtInst_TestMixLinearBlendFloat_lit.frag | 7 + .../OpExtInst_TestMixSelectDouble_lit.frag | 7 + .../OpExtInst_TestMixSelectFloat_lit.frag | 7 + .../OpExtInst_TestMixSelectInt_lit.frag | 7 + .../OpExtInst_TestMixSelectUint_lit.frag | 7 + .../OpExtInst_TestModfDouble_lit.frag | 7 + .../OpExtInst_TestModfFloat_lit.frag | 7 + .../OpExtInst_TestModfVec4_lit.frag | 7 + .../OpExtInst_TestNonSemanticInfo.spvasm | 8 + .../OpExtInst_TestNormalizeDouble_lit.frag | 7 + .../OpExtInst_TestNormalizeFloat_lit.frag | 7 + .../OpExtInst_TestNormalizeVec4_lit.frag | 7 + .../OpExtInst_TestPackDouble2x32_lit.frag | 7 + .../OpExtInst_TestPackHalf2x16_lit.frag | 7 + .../OpExtInst_TestPackSnorm2x16_lit.frag | 7 + .../OpExtInst_TestPackSnorm4x8_lit.frag | 7 + .../OpExtInst_TestPackUnorm2x16_lit.frag | 7 + .../OpExtInst_TestPackUnorm4x8_lit.frag | 7 + .../extensions/OpExtInst_TestPow2_lit.frag | 7 + .../OpExtInst_TestPowVec4Const_lit.frag | 7 + .../extensions/OpExtInst_TestPow_lit.frag | 7 + .../OpExtInst_TestRadiansVec4Const_lit.frag | 7 + .../extensions/OpExtInst_TestRadians_lit.frag | 7 + .../OpExtInst_TestReflectDouble_lit.frag | 7 + .../OpExtInst_TestReflectFloat_lit.frag | 7 + .../OpExtInst_TestReflectVec4_lit.frag | 7 + .../OpExtInst_TestRefractDouble_lit.frag | 7 + .../OpExtInst_TestRefractFloat_lit.frag | 7 + .../OpExtInst_TestRefractVec4_lit.frag | 7 + .../OpExtInst_TestRoundDouble_lit.frag | 7 + .../OpExtInst_TestRoundEvenDouble_lit.frag | 7 + .../OpExtInst_TestRoundEvenFloat_lit.frag | 7 + .../OpExtInst_TestRoundEvenVec4_lit.frag | 7 + .../OpExtInst_TestRoundFloat_lit.frag | 7 + .../OpExtInst_TestRoundVec4_lit.frag | 7 + .../OpExtInst_TestSignDouble_lit.frag | 7 + .../OpExtInst_TestSignFloat_lit.frag | 7 + .../extensions/OpExtInst_TestSignInt_lit.frag | 7 + .../OpExtInst_TestSignIvec4_lit.frag | 7 + .../OpExtInst_TestSignVec4_lit.frag | 7 + .../OpExtInst_TestSinVec4Const_lit.frag | 7 + .../extensions/OpExtInst_TestSin_lit.frag | 7 + .../OpExtInst_TestSinhFloat_lit.frag | 7 + .../extensions/OpExtInst_TestSinh_lit.frag | 7 + .../OpExtInst_TestSmoothStepDouble_lit.frag | 7 + .../OpExtInst_TestSmoothStepFloat_lit.frag | 7 + ...OpExtInst_TestSmoothStepVec4Const_lit.frag | 7 + .../OpExtInst_TestSqrtDouble_lit.frag | 7 + .../OpExtInst_TestSqrtFloat_lit.frag | 7 + .../OpExtInst_TestSqrtVec4Const_lit.frag | 7 + .../OpExtInst_TestStepDouble_lit.frag | 7 + .../OpExtInst_TestStepFloat_lit.frag | 7 + .../OpExtInst_TestStepVec4Const_lit.frag | 7 + .../OpExtInst_TestTanVec4Const_lit.frag | 7 + .../extensions/OpExtInst_TestTan_lit.frag | 7 + .../OpExtInst_TestTanhFloat_lit.frag | 7 + .../extensions/OpExtInst_TestTanh_lit.frag | 7 + .../OpExtInst_TestTruncDouble_lit.frag | 7 + .../OpExtInst_TestTruncFloat_lit.frag | 7 + .../OpExtInst_TestTruncVec4_lit.frag | 7 + .../OpExtInst_TestUintBitsToFloat_lit.frag | 7 + .../OpExtInst_TestUnpackDouble2x32_lit.frag | 7 + .../OpExtInst_TestUnpackHalf2x16_lit.frag | 7 + .../OpExtInst_TestUnpackSnorm2x16_lit.frag | 7 + .../OpExtInst_TestUnpackSnorm4x8_lit.frag | 7 + .../OpExtInst_TestUnpackUnorm2x16_lit.frag | 7 + .../OpExtInst_TestUnpackUnorm4x8_lit.frag | 7 + .../OpExtInst_TestinverseMat2-lit.frag | 7 + .../PipelineVsFs_TestAlpha2Coverage.pipe | 8 + ...neVsFs_ViewIndexWithMultiViewDisabled.pipe | 8 + .../GraphicsFuzz_ComputeBlockPressure.spvasm | 8 + ...GraphicsFuzz_FindKillUseAfterPoison.spvasm | 8 + .../fuzzer/GraphicsFuzz_ISelAlignment.spvasm | 8 + ...tAccessChainIndexConvertedFromFloat.spvasm | 8 + ...zz_TestAccessChainUsingInputPointer.spvasm | 8 + .../fuzzer/GraphicsFuzz_TestBVec4.spvasm | 8 + ...phicsFuzz_TestConditionalsAndOpKill.spvasm | 8 + ...phicsFuzz_TestControlFlowInFunction.spvasm | 8 + .../GraphicsFuzz_TestLoopDeepIfLoop.spvasm | 8 + .../GraphicsFuzz_TestLoopNestedIfs.spvasm | 8 + ...phicsFuzz_TestLoopsIfsContinuesCall.spvasm | 8 + ...csFuzz_TestMaxMixConditionalDiscard.spvasm | 8 + .../GraphicsFuzz_TestModFGlColor.spvasm | 8 + .../GraphicsFuzz_TestModFTempColor.spvasm | 8 + .../GraphicsFuzz_TestOpCopyObject.spvasm | 8 + ...uzz_TestOpCopyObjectFromAccessChain.spvasm | 8 + .../GraphicsFuzz_TestOpIAddCarry.spvasm | 8 + ...GraphicsFuzz_TestOpPhisAtLoopHeader.spvasm | 8 + .../fuzzer/GraphicsFuzz_TestOpSNegate.spvasm | 8 + .../GraphicsFuzz_TestSimilarNestedIfs.spvasm | 8 + .../fuzzer/GraphicsFuzz_TestSwitch.spvasm | 8 + .../GraphicsFuzz_TestTwoLoopsMatrix.spvasm | 8 + .../GraphicsFuzz_TestTwoLoopsSetStruct.spvasm | 8 + .../GraphicsFuzz_TestTwoLoopsWithBreak.spvasm | 8 + ...csFuzz_UseNotJointlyDominatedByDefs.spvasm | 8 + .../general/AggressiveInvariantLoads.pipe | 8 + .../CallInstAsUserOfGlobalVariable.spvasm | 8 + .../general/CantOptimizePointSizeWrite.pipe | 8 + .../CbShaderMaskWithDummyExport.spvasm | 8 + .../general/CbShaderMaskWithDummyExport4.pipe | 8 + llpc/test/shaderdb/general/CoherentArray.frag | 7 + .../test/shaderdb/general/CoherentVector.frag | 7 + .../shaderdb/general/CsPipelineDumpTest.pipe | 8 + .../shaderdb/general/CsTimerProfileTest.pipe | 8 + .../general/DisableInvariantLoads.pipe | 8 + .../DiscardToDemoteTransformations.frag | 7 + ...ardToDemoteTransformationsNotRequired.frag | 7 + llpc/test/shaderdb/general/ImgDescLoad.comp | 25 + .../general/MeshOutputsToAllocas.mesh | 61 + .../general/MissingResourceNodeTest.pipe | 8 + .../shaderdb/general/NggInCullingMode.pipe | 8 + llpc/test/shaderdb/general/NsaThreshold.pipe | 8 + .../general/OptimizePointSizeWrite.pipe | 8 + .../shaderdb/general/OutputPrimitiveTest.geom | 2 + .../PipelineCs_DebugBreak_intrinsic.pipe | 8 + .../general/PipelineCs_DebugPrintf.pipe | 8 + .../PipelineCs_ForceMemoryBarrierScope.pipe | 8 + .../PipelineCs_LdsSpillLimitDwordsOption.pipe | 8 + .../PipelineCs_MultipleRootInlineBuffer.pipe | 8 + ..._OverrideShaderThreadGroupSize16X16X1.pipe | 8 + ...Cs_OverrideShaderThreadGroupSize8X8X1.pipe | 8 + .../PipelineCs_TestConstImmediateStore.pipe | 8 + .../PipelineCs_TestDynDescNoSpill.pipe | 8 + .../PipelineCs_TestDynDescNoSpill_lit.pipe | 8 + .../general/PipelineCs_TestDynDescSpill.pipe | 8 + .../PipelineCs_TestFetch2DMSFmaskBased.pipe | 8 + ...ipelineCs_TestFetch2DMSFmaskBased_lit.pipe | 8 + .../PipelineCs_TestFetch2DMSFmaskOnly.pipe | 8 + .../PipelineCs_TestInlineConstDirect.pipe | 8 + .../PipelineCs_TestInlineConstDirect_lit.pipe | 8 + .../PipelineCs_TestInlineConstIndirect.pipe | 8 + ...ipelineCs_TestInlineConstIndirect_lit.pipe | 8 + .../PipelineCs_TestMultiEntryPoint.pipe | 8 + .../PipelineCs_TestMultiEntryPoint_lit.pipe | 8 + .../PipelineGsTess_TestInOutPacking.pipe | 8 + .../PipelineGs_TestViewIndexAndLayer.pipe | 8 + .../PipelineMesh_OutputPackingInLds.pipe | 22 +- ...Mesh_TestMismatchMeshInOutWithAllocas.pipe | 96 + .../PipelineRays_TestLgcRtTraceRayOp.pipe | 8 + .../PipelineTaskMesh_LdsVariables.pipe | 8 + ...Tes_OutputComponentNotReadByNextStage.pipe | 8 + ...elineTcsTes_TestLocMapArrayElemAccess.pipe | 8 + ...neTcsTes_TestLocMapArrayElemDynAccess.pipe | 8 + ...ineTcsTes_TestLocMapLoadBuiltInOutput.pipe | 10 +- ...ineTcsTes_TestLocMapLoadGenericOutput.pipe | 8 + ...ipelineTcsTes_TestLocMapVecCompAccess.pipe | 8 + ...lineTcsTes_TestLocMapVecCompDynAccess.pipe | 8 + ...csTes_TestTessLevelDynIndexForIsoline.pipe | 8 + ...neTcsTes_TestTessLevelDynIndexForQuad.pipe | 8 + ...sTes_TestTessLevelDynIndexForTriangle.pipe | 8 + ...ineTcsTes_TestTessLevelElemForIsoline.pipe | 8 + ...pelineTcsTes_TestTessLevelElemForQuad.pipe | 8 + ...neTcsTes_TestTessLevelElemForTriangle.pipe | 8 + ...ipelineTcsTes_TestTessLevelForIsoline.pipe | 8 + .../PipelineTcsTes_TestTessLevelForQuad.pipe | 8 + ...pelineTcsTes_TestTessLevelForTriangle.pipe | 8 + .../PipelineTess_TestInOutPacking.pipe | 8 + .../PipelineTess_XfbWithManyComponents.pipe | 20 +- .../PipelineVsFs_ColorExportShader.pipe | 8 + .../general/PipelineVsFs_DisableFMA.pipe | 8 + .../PipelineVsFs_DynamicSampleInfo.pipe | 8 + .../general/PipelineVsFs_FsWithData.pipe | 8 + .../general/PipelineVsFs_GlPositionFMF.pipe | 8 + .../PipelineVsFs_MultiTableDescSet.pipe | 8 + .../PipelineVsFs_NullFragmentShader.pipe | 8 + .../PipelineVsFs_PixelShaderSamplesZero.pipe | 8 + ...ipelineVsFs_TestBarycentric_line_list.pipe | 8 + .../PipelineVsFs_TestBarycentric_tri_fan.pipe | 8 + ...PipelineVsFs_TestBarycentric_tri_list.pipe | 8 + .../PipelineVsFs_TestColorFormat_A8.pipe | 8 + .../PipelineVsFs_TestConstImmediateStore.pipe | 8 + .../PipelineVsFs_TestDualSourceBlend.pipe | 8 + ...s_TestDualSourceBlend_onlyOneRTExport.pipe | 8 + ...pelineVsFs_TestExpWithRGB_UINT_PACK32.pipe | 8 + ...estIgnoreDynamicDualSourceBlendEnable.pipe | 8 + .../PipelineVsFs_TestInOutPacking.pipe | 8 + .../PipelineVsFs_TestIncludeLlvmIr.pipe | 8 + ...pelineVsFs_TestIndirectResourceLayout.pipe | 8 + ...eVsFs_TestInterpAtCentriodBarycentric.pipe | 8 + .../general/PipelineVsFs_TestNullFs.pipe | 12 +- .../PipelineVsFs_TestPervertexVariable.pipe | 8 + .../PipelineVsFs_TestPointerInOut.pipe | 8 + .../PipelineVsFs_TestPrimitiveID_First.pipe | 8 + .../PipelineVsFs_TestPrimitiveID_Last.pipe | 8 + ...pelineVsFs_TestSubpassInputFmaskBased.pipe | 8 + .../general/PipelineVsFs_TestUberShader.pipe | 8 + .../PipelineVsFs_TestVertexDivisor.pipe | 8 + .../PipelineVsFs_TestVertexFetchWithR8G8.pipe | 8 + .../PipelineVsFs_TestViewportIndex.pipe | 8 + .../PipelineVsFs_Test_unused_outputs.pipe | 8 + .../general/PipelineVsFs_VsAndFsWithData.pipe | 8 + .../general/PipelineVsFs_VsWithData.pipe | 8 + .../PipelineVsGsFs_TestDwordPacking.pipe | 8 + .../general/PipelineVsGsFs_TestMergeNode.pipe | 8 + .../general/PipelineVsGs_TestBasicInOut.pipe | 8 + .../PipelineVsGs_TestBuiltinInOut.pipe | 8 + .../PipelineVsPs_TestFetchRGB10A2.pipe | 8 + .../shaderdb/general/PrintOptionsTest.spvasm | 8 + .../ScalarBlockLayoutOptionTest.spvasm | 8 + .../shaderdb/general/ScheduleStrategy.pipe | 29 + .../general/SubgroupShuffleIndexConstant.comp | 25 + .../SubgroupShuffleIndexDivergent.comp | 25 + .../general/SubgroupShuffleIndexUniform.comp | 25 + .../TessInOutWithReadBackOnlyOutputs.pipe | 250 ++ .../general/TestBuiltinFrexpLdexp.comp | 25 + ...estCombineOfMultipleStoreInstructions.frag | 7 + ...ompilationOfNestedStructTaskPayload.spvasm | 8 + .../general/TestComponentIndexing.tese | 2 + .../TestConstantImmStore_FunctionInline.frag | 7 + .../general/TestDeduplicateConstTables.frag | 7 + .../general/TestDeduplicateConstTables.spvasm | 8 + ...tNumComponentsWithReversedAccessOrder.mesh | 2 + .../shaderdb/general/TestPatchBufferOp.comp | 25 + .../shaderdb/general/TestWorkgroupIdOpt.comp | 25 + .../general/TestWorkgroupMemoryLayout.spvasm | 8 + .../shaderdb/general/UndefVertexOutput.spvasm | 51 +- .../VertexOptimizationLevelTest.spvasm | 8 + .../general/VertexPipelineDumpTest.spvasm | 8 + .../general/VertexTimerProfileTest.spvasm | 8 + .../general/VsFsPipelineDumpTest.pipe | 8 + .../general/VsFsTimerProfileTest.pipe | 8 + .../WorkaroundStorageImageFormats.pipe | 8 + .../general/WorkgroupSizeLiteral.spvasm | 8 + llpc/test/shaderdb/general/outputArray.frag | 26 + .../gfx10/CheckFMFOptions_NoContract.pipe | 8 + .../gfx10/PipelineMergeAttributes_GsVs.pipe | 8 + .../PipelineMergeAttributes_GsVsNgg.pipe | 8 + .../gfx10/PipelineMergeAttributes_HsLs.pipe | 8 + .../PipelineVsFs_TestFetchSingleInputNgg.pipe | 8 + ...ineVsFs_TestSubgroupSizeUsageFragment.pipe | 8 + ...elineVsFs_TestSubgroupSizeUsageVertex.pipe | 8 + .../PipelineVsFs_TestVsOutMiscSideBusEna.pipe | 8 + llpc/test/shaderdb/gfx10/TestWaveSize.comp | 25 + llpc/test/shaderdb/gfx10/WgpModeDisabled.pipe | 8 + llpc/test/shaderdb/gfx10/WgpModeEnabled.pipe | 8 + llpc/test/shaderdb/gfx10/lit.local.cfg | 24 + .../shaderdb/gfx11/AttributePrecedesPos.pipe | 8 + .../gfx11/ExtSubgroup_TestReduction.comp | 25 + llpc/test/shaderdb/gfx11/FlatParamDpp.frag | 7 + llpc/test/shaderdb/gfx11/HalfAttribute.frag | 7 + .../shaderdb/gfx11/SgprUserDataInit_Cs.pipe | 12 +- .../shaderdb/gfx11/SgprUserDataInit_Fs.pipe | 25 +- .../gfx11/TessFactorStoreWithOpt.pipe | 26 +- .../gfx11/TestGdsOperationsForXfb.vert | 25 +- .../shaderdb/gfx11/TestGsXfbWithHole.pipe | 8 + .../cooperativeMatrix/array-of-matrices.comp | 33 +- .../cooperativeMatrix/extract-insert.spvasm | 22 +- .../cooperativeMatrix/loadstore-uvec4.comp | 29 +- llpc/test/shaderdb/gfx11/lit.local.cfg | 25 + .../PipelineRays_TestStaticCompile.pipe | 8 + .../PipelineLibCes_TestColorExport.pipe | 8 + .../PipelineLibFs_TestFsLibrary.pipe | 8 + .../PipelineLibVs_TestVsLibrary.pipe | 8 + .../PipelineVsFs_TestGraphicsLibrary.pipe | 8 + .../hlsl/Hlsl_TestCBufferArrayPacking.spvasm | 8 + ...l_TestCBufferArrayPackingFullStruct.spvasm | 8 + ...Hlsl_TestLoadRowMajorMatrixInStruct.spvasm | 8 + ...lsl_TestStoreRowMajorMatrixInStruct.spvasm | 8 + .../hlsl/Hlsl_TestStructuredBuffers.spvasm | 8 + .../SpirvTwoEntryPoints.spvasm | 8 + .../multiple_inputs/test_inputs/Fs1.frag | 7 + .../multiple_inputs/test_inputs/Fs2.frag | 7 + .../PipelineVsFs_ConstantData_Vs1Fs1.pipe | 8 + .../PipelineVsFs_ConstantData_Vs1Fs2.pipe | 8 + .../PipelineVsFs_ConstantData_Vs2Fs1.pipe | 8 + .../multiple_inputs/test_inputs/Vs1.vert | 7 + .../multiple_inputs/test_inputs/Vs2.vert | 7 + .../object/ObjConstant_TestArray_lit.frag | 7 + .../object/ObjConstant_TestElementReuse.frag | 7 + .../object/ObjConstant_TestMatrix_lit.frag | 7 + ...onstant_TestSpecConstantArraySize_lit.vert | 7 + .../ObjConstant_TestSpecConstantOp_lit.vert | 7 + ...Constant_TestSpecialSourceSwizzle_lit.frag | 7 + .../object/ObjConstant_TestStruct_lit.frag | 7 + .../object/ObjFragMask_TestFragFetch_lit.frag | 7 + .../ObjImage_TestCubeAtomicAdd_lit.comp | 25 + .../ObjImage_TestMemoryQualifier_lit.frag | 7 + .../object/ObjInput_TestCsBuiltIn_lit.comp | 25 + .../object/ObjInput_TestDrawParams_lit.vert | 7 + .../object/ObjInput_TestFsBasic_lit.frag | 7 + .../object/ObjInput_TestFsBuiltIn_lit.frag | 7 + .../ObjInput_TestFsCompSpecifier_lit.frag | 7 + .../object/ObjInput_TestFsDouble_lit.frag | 7 + .../object/ObjInput_TestFsInBlock_lit.frag | 7 + ...nput_TestFsInterpQualifierInBlock_lit.frag | 7 + ...put_TestFsInterpQualifierOnStruct_lit.frag | 7 + .../ObjInput_TestFsInterpQualifier_lit.frag | 7 + .../ObjInput_TestFsMatrixArray_lit.frag | 7 + .../object/ObjInput_TestFsMatrix_lit.frag | 7 + ...t_TestFsNonVolatileHelperInvocation.spvasm | 8 + .../object/ObjInput_TestFsStruct_lit.frag | 7 + .../ObjInput_TestFsVectorArray_lit.frag | 7 + ...nput_TestFsVolatileHelperInvocation.spvasm | 8 + .../object/ObjInput_TestGsBasic_lit.geom | 2 + .../object/ObjInput_TestGsBuiltIn_lit.geom | 2 + .../ObjInput_TestGsCompSpecifier_lit.geom | 2 + ...ut_TestIndexingInterpOfInputArray_lit.frag | 7 + .../object/ObjInput_TestTcsBasic_lit.tesc | 2 + .../object/ObjInput_TestTcsBuiltIn_lit.tesc | 2 + .../ObjInput_TestTcsCompSpecifier_lit.tesc | 2 + ...put_TestTcsLoadEntireInputArray_lit.spvasm | 8 + .../object/ObjInput_TestTcsViewIndex.spvasm | 8 + .../object/ObjInput_TestTesBasic_lit.tese | 2 + .../object/ObjInput_TestTesBuiltIn_lit.tese | 2 + .../ObjInput_TestTesCompSpecifier_lit.tese | 2 + .../ObjInput_TestTesComplexInBlock_lit.tese | 2 + ...jInput_TestTesComplexPatchInBlock_lit.tese | 2 + .../object/ObjInput_TestTesConstExpr_lit.tese | 2 + .../ObjInput_TestUnUsedVariable_lit.comp | 25 + .../object/ObjInput_TestVsBasic_lit.vert | 7 + .../object/ObjInput_TestVsBuiltIn_lit.vert | 7 + .../ObjInput_TestVsCompSpecifier_lit.vert | 7 + .../object/ObjInput_TestVsDouble_lit.vert | 7 + .../object/ObjInput_TestVsMatrix.vert | 7 + .../object/ObjInput_TestVsMatrixArray.vert | 7 + .../ObjInput_TestVsVectorArray_lit.vert | 7 + .../ObjNonUniformIndex_TestLoadRowMajor.comp | 25 + .../object/ObjNonUniform_TestImageSample.frag | 7 + .../ObjNonUniform_TestMinNonUniform.spvasm | 8 + .../object/ObjOutput_TestFsBasic_lit.frag | 7 + .../object/ObjOutput_TestFsBuiltIn_lit.frag | 7 + .../ObjOutput_TestFsCompSpecifier_lit.frag | 7 + .../object/ObjOutput_TestFsNoOut.frag | 7 + .../object/ObjOutput_TestFsVector_lit.frag | 7 + .../object/ObjOutput_TestGsBasic_lit.geom | 2 + .../object/ObjOutput_TestGsBuiltIn_lit.geom | 2 + .../ObjOutput_TestGsCompSpecifier_lit.geom | 2 + .../object/ObjOutput_TestLlpcOpt.frag | 7 + .../shaderdb/object/ObjOutput_TestOpt.frag | 7 + .../object/ObjOutput_TestTcsBasic_lit.tesc | 2 + .../object/ObjOutput_TestTcsBuiltIn_lit.tesc | 2 + .../ObjOutput_TestTcsCompSpecifier_lit.tesc | 2 + .../ObjOutput_TestTcsComplexOutBlock_lit.tesc | 2 + ...utput_TestTcsComplexPatchOutBlock_lit.tesc | 2 + .../ObjOutput_TestTcsConstExpr_lit.tesc | 2 + .../object/ObjOutput_TestTesBasic_lit.tese | 2 + .../object/ObjOutput_TestTesBuiltIn_lit.tese | 2 + .../ObjOutput_TestTesCompSpecifier_lit.tese | 2 + .../object/ObjOutput_TestVsBasic_lit.vert | 7 + .../object/ObjOutput_TestVsBuiltIn_lit.vert | 7 + .../ObjOutput_TestVsCompSpecifier_lit.vert | 7 + .../object/ObjOutput_TestVsDouble_lit.vert | 7 + .../object/ObjOutput_TestVsMatrix.vert | 7 + .../object/ObjOutput_TestVsMatrixArray.vert | 7 + .../object/ObjOutput_TestVsNoBuiltIn_lit.vert | 7 + .../object/ObjOutput_TestVsNoGeneric_lit.vert | 7 + .../object/ObjOutput_TestVsOutBlock_lit.vert | 7 + .../object/ObjOutput_TestVsStruct_lit.vert | 7 + .../ObjOutput_TestVsVectorArray_lit.vert | 7 + .../object/ObjPushConst_TestBasic_lit.vert | 7 + .../ObjPushConst_TestNestedStruct_lit.vert | 7 + .../ObjPushConst_TestSpillToMemory_lit.vert | 7 + .../object/ObjPushConstant_TestBasic_lit.frag | 7 + ...hConstant_TestMultiPushConstant_lit.spvasm | 8 + .../object/ObjResource_TestAlias_lit.frag | 7 + .../object/ObjSampler_TestLargeId_lit.frag | 7 + ...Sampler_TestSeparateSamplerShadow_lit.frag | 7 + .../ObjShaderBallot_TestGeneral_lit.comp | 25 + .../ObjSharedVariable_TestArrayCopy_lit.comp | 25 + .../ObjSharedVariable_TestArray_lit.comp | 25 + .../ObjSharedVariable_TestBasic_lit.comp | 25 + .../ObjSharedVariable_TestMatrix_lit.comp | 25 + .../ObjSharedVariable_TestStruct_lit.comp | 25 + .../object/ObjStorageBlock_TestAlign_lit.frag | 7 + .../ObjStorageBlock_TestDirectIndex_lit.frag | 7 + .../ObjStorageBlock_TestDouble_lit.frag | 7 + ...ObjStorageBlock_TestIndirectIndex_lit.frag | 7 + ...bjStorageBlock_TestMatrixInStruct_lit.vert | 7 + .../ObjStorageBlock_TestMemCpyInt16.comp | 25 + .../ObjStorageBlock_TestMemCpyInt32.comp | 25 + .../ObjStorageBlock_TestMemCpyInt8.comp | 25 + .../ObjStorageBlock_TestMemSetInt16.comp | 25 + .../ObjStorageBlock_TestMemSetInt32.comp | 25 + .../ObjStorageBlock_TestMemSetInt8.comp | 25 + ...jStorageBlock_TestMemoryQualifier_lit.frag | 7 + ...geBlock_TestMultiLevelAccessChain_lit.vert | 7 + .../ObjStorageBlock_TestOffset_lit.frag | 7 + .../ObjStorageBlock_TestRowMajor_lit.frag | 7 + .../ObjStorageBlock_TestRuntimeArray_lit.vert | 7 + ...StorageBlock_TestStoreBasicDouble_lit.vert | 7 + ...jStorageBlock_TestStoreBasicFloat_lit.vert | 7 + ...ObjStorageBlock_TestStoreBasicInt_lit.vert | 7 + ...bjStorageBlock_TestStoreBasicUint_lit.vert | 7 + ...StorageBlock_TestStoreMatrixArray_lit.vert | 7 + .../ObjStorageBlock_TestStoreMatrix_lit.vert | 7 + ...geBlock_TestStoreMixedMatrixStyle_lit.frag | 7 + ...rageBlock_TestStoreRowMajorMatrix_lit.frag | 7 + ...StorageBlock_TestStoreScalarArray_lit.vert | 7 + .../ObjStorageBlock_TestStoreStruct_lit.vert | 7 + ...orageBlock_TestStoreToMatrixArray_lit.vert | 7 + ...ObjStorageBlock_TestStoreToMatrix_lit.vert | 7 + ...geBlock_TestStoreToRowMajorMatrix_lit.frag | 7 + ...lock_TestStoreToScalarVectorArray_lit.vert | 7 + ...bjStorageBlock_TestStoreVectorArray.spvasm | 8 + ...StorageBlock_TestStoreVectorArray_lit.vert | 7 + ...StorageBlock_TestUseStorageBuffer_lit.frag | 7 + ...ageBlock_TestVectorComponentStore_lit.comp | 25 + .../object/ObjUniformBlock_TestAlign_lit.frag | 7 + .../ObjUniformBlock_TestDirectIndex_lit.frag | 7 + ...ObjUniformBlock_TestIndirectIndex_lit.frag | 7 + ...jUniformBlock_TestLoadBasicDouble_lit.vert | 7 + ...bjUniformBlock_TestLoadBasicFloat_lit.vert | 7 + .../ObjUniformBlock_TestLoadBasicInt_lit.vert | 7 + ...ObjUniformBlock_TestLoadBasicUint_lit.vert | 7 + ...formBlock_TestLoadFromMatrixArray_lit.vert | 7 + ...bjUniformBlock_TestLoadFromMatrix_lit.vert | 7 + ...mBlock_TestLoadFromRowMajorMatrix_lit.frag | 7 + ...ock_TestLoadFromScalarVectorArray_lit.vert | 7 + ...jUniformBlock_TestLoadMatrixArray_lit.vert | 7 + .../ObjUniformBlock_TestLoadMatrix_lit.vert | 7 + ...ormBlock_TestLoadMixedMatrixStyle_lit.frag | 7 + ...UniformBlock_TestLoadNestedStruct_lit.vert | 7 + ...iformBlock_TestLoadRowMajorMatrix_lit.frag | 7 + ...jUniformBlock_TestLoadScalarArray_lit.vert | 7 + .../ObjUniformBlock_TestLoadStruct_lit.vert | 7 + ...jUniformBlock_TestLoadVectorArray_lit.vert | 7 + .../ObjUniformBlock_TestOffset_lit.frag | 7 + .../ObjUniformConstant_TestArray_lit.frag | 7 + .../ObjUniformConstant_TestSimple_lit.frag | 7 + .../shaderdb/object/ObjXfb_TestBasic_lit.vert | 7 + .../ray_tracing/PipelineRayquery.pipe | 8 + .../PipelineRays_Continuations.pipe | 20 + ...inuations_IntersectionShaderVgprCount.pipe | 8 + ...Continuations_SpecializeDriverShaders.pipe | 8 + ...inuations_SpecializeDriverShaders_Isa.pipe | 8 + .../PipelineRays_DifferentPayloads.pipe | 166 + .../ray_tracing/PipelineRays_Irreducible.pipe | 8 + .../ray_tracing/PipelineRays_NoPayload.pipe | 194 + ...neRays_SetContinuationsCompileUnified.pipe | 8 + .../PipelineRays_TestLaunchKernel.pipe | 8 + .../PipelineRays_TestLibraryNoTraversal.pipe | 8 + ...eRays_TestRtIgnoreDeclaredPayloadSize.pipe | 8 + .../PipelineRays_TestStaticCompile.pipe | 8 + .../PipelineRays_TestWaveSize.pipe | 8 + .../shaderdb/ray_tracing/TestContState.rchit | 7 + .../shaderdb/ray_tracing/TestContState.rgen | 7 + .../ray_tracing/TestHitAttribute.rint | 7 + .../ray_tracing/TestKnownRayFlags.pipe | 8 + .../ray_tracing/TestPayloadSizes.rgen | 7 + .../ray_tracing/TestProcessGpuRtLibrary.rgen | 7 + llpc/test/shaderdb/ray_tracing/lit.local.cfg | 25 + .../shaderdb/ray_tracing/standalone.rahit | 7 + .../shaderdb/ray_tracing/standalone.rcall | 7 + .../shaderdb/ray_tracing/standalone.rchit | 7 + .../test/shaderdb/ray_tracing/standalone.rgen | 7 + .../test/shaderdb/ray_tracing/standalone.rint | 7 + .../shaderdb/ray_tracing/standalone.rmiss | 2 + .../DescPtrSingleSelect.spvasm | 8 + .../PipelineCs_PipelineCacheHit.pipe | 8 + .../PipelineCs_TestUnsupportedShader.pipe | 8 + .../PipelineGs_BasicRelocGsTest.pipe | 8 + ...elineTess_RelocRemoveUnusedTcsOutputs.pipe | 8 + ...ipelineVsFs_CheckFloatModeFlushToZero.pipe | 8 + .../PipelineVsFs_CheckFloatModePreserve.pipe | 8 + .../PipelineVsFs_EnableColorExport.pipe | 8 + .../PipelineVsFs_FillPsInput.pipe | 8 + .../PipelineVsFs_ImmutableSampler.pipe | 8 + .../PipelineVsFs_MultiDwordPushConst.pipe | 8 + .../PipelineVsFs_MultipleConstData.pipe | 8 + .../PipelineVsFs_PipelineCacheHit.pipe | 8 + .../PipelineVsFs_RelocCheckPsInControl.pipe | 8 + .../PipelineVsFs_RelocMultiView.pipe | 8 + .../PipelineVsFs_ShadowDescTable.pipe | 8 + ...elineVsFs_ShadowDescTableMissingFmask.pipe | 8 + ...elineVsFs_TestRelocatableInOutMapping.pipe | 8 + ...Fs_TestRelocatableSeparateCompilation.pipe | 8 + .../PipelineVsPs_BuiltinExportInPrologue.pipe | 8 + .../PipelineVsPs_PsInput.pipe | 8 + ...ngleVs_CheckNoteSectionForCacheHash.spvasm | 8 + .../relocatable_shaders/VsGs_Reloc.spvasm | 8 + .../relocatable_shaders/lit.local.cfg | 24 + llpc/test/shaderdb/xdl/lit.local.cfg | 33 + .../UpdateTestChecks/Inputs/base_test.pipe | 91 + .../Inputs/check_attributes.pipe.expected | 93 + .../Inputs/check_globals.pipe.expected | 109 + .../Inputs/check_globals_all.pipe.expected | 109 + .../Inputs/check_globals_smart.pipe.expected | 94 + .../Inputs/check_pal_metadata.pipe | 365 ++ .../Inputs/check_pal_metadata.pipe.expected | 365 ++ .../Inputs/function.pipe.expected | 68 + .../Inputs/function_signature.pipe.expected | 93 + .../Inputs/generalize_calls.base.lgc | 94 + .../Inputs/generalize_calls.lgc.expected | 128 + .../Inputs/no_generalize_calls.lgc.expected | 128 + .../Inputs/stable_ir_values.pipe | 90 + .../Inputs/stable_ir_values.pipe.expected | 91 + .../stable_ir_values.pipe.expected.reset | 91 + .../UpdateTestChecks/check_attributes.test | 2 + .../UpdateTestChecks/check_function.test | 2 + .../check_function_signature.test | 2 + .../tools/UpdateTestChecks/check_globals.test | 6 + .../UpdateTestChecks/check_pal_metadata.test | 3 + .../test/tools/UpdateTestChecks/lit.local.cfg | 64 + .../UpdateTestChecks/stable_ir_values.test | 5 + .../stable_ir_values_reset.test | 2 + llpc/tool/amdllpc.cpp | 12 +- llpc/tool/llpcAutoLayout.cpp | 14 +- llpc/translator/lib/SPIRV/SPIRVReader.cpp | 625 ++- llpc/translator/lib/SPIRV/SPIRVReader.h | 24 +- .../lib/SPIRV/SPIRVToLLVMDbgTran.cpp | 17 +- .../lib/SPIRV/libSPIRV/SPIRV.debug.h | 2 + .../lib/SPIRV/libSPIRV/SPIRVErrorEnum.h | 2 + .../lib/SPIRV/libSPIRV/SPIRVInstruction.h | 33 +- .../lib/SPIRV/libSPIRV/SPIRVOpCode.h | 8 +- .../lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h | 2 + llpc/unittests/CMakeLists.txt | 2 +- llpc/unittests/lit.site.cfg.py.in | 25 + llvmraytracing/CMakeLists.txt | 16 +- llvmraytracing/include/lgc/GpurtDialect.td | 10 +- llvmraytracing/include/lgc/LgcCpsDialect.h | 20 +- llvmraytracing/include/lgc/LgcIlCpsDialect.td | 6 +- .../include/llvmraytracing/Continuations.h | 2 +- .../llvmraytracing/ContinuationsUtil.h | 3 + .../include/llvmraytracing/CpsStackLowering.h | 6 +- .../include/llvmraytracing/LowerRayQuery.h | 8 +- llvmraytracing/lib/CleanupContinuations.cpp | 38 +- llvmraytracing/lib/ContStateBuilder.cpp | 71 +- llvmraytracing/lib/Continuations.cpp | 16 +- llvmraytracing/lib/CpsStackLowering.cpp | 8 +- .../lib/DXILContLgcRtOpConverter.cpp | 89 +- llvmraytracing/lib/DXILContPostProcess.cpp | 7 +- .../lib/DXILContPrepareGpurtLibrary.cpp | 6 +- llvmraytracing/lib/DXILEnums.h | 164 + llvmraytracing/lib/LgcCpsDialect.cpp | 58 +- llvmraytracing/lib/LgcCpsJumpInliner.cpp | 4 +- llvmraytracing/lib/LowerAwait.cpp | 4 +- llvmraytracing/lib/LowerRayQuery.cpp | 4 +- .../lib/LowerRaytracingPipeline.cpp | 126 +- llvmraytracing/lib/RematSupport.cpp | 4 +- .../lib/SpecializeDriverShaders.cpp | 10 +- llvmraytracing/test/CMakeLists.txt | 3 +- .../test/intrinsics/discard-values.ll | 24 + .../intrinsics/get-func-addr-not-found.ll | 25 + .../test/intrinsics/get-func-addr.ll | 30 +- .../test/intrinsics/shader-start.ll | 36 +- .../lgccps/CpsLowering/continuation-basic.ll | 24 + .../lgccps/CpsLowering/cps-entry-point.ll | 24 + .../lgccps/CpsLowering/cps-from-continufy.ll | 24 + .../cps-stack-lowering-spirv-global.ll | 268 ++ .../lgccps/CpsLowering/cps-stack-lowering.ll | 24 + .../lgccps/CpsLowering/cps-unify-exits.ll | 24 + llvmraytracing/test/lgccps/alloca-select.ll | 24 + llvmraytracing/test/lgccps/await-if-else.ll | 24 + llvmraytracing/test/lgccps/await-if.ll | 24 + llvmraytracing/test/lgccps/await-in-loop.ll | 24 + .../test/lgccps/call-shader-i1-payload.ll | 28 +- .../test/lgccps/cleanup-store-loads.ll | 24 + llvmraytracing/test/lgccps/cps-no-await.ll | 24 + .../test/lgccps/entry-point-with-cps.ll | 24 + .../cont-payload-registers-get-i32.ll | 26 +- .../cont-payload-registers-i32-count.ll | 26 +- .../cont-payload-registers-set-i32.ll | 26 +- llvmraytracing/test/lgccps/multiple-await.ll | 24 + .../test/lgccps/simple-await-more-state.ll | 24 + llvmraytracing/test/lgccps/simple-await.ll | 24 + .../lgccps/traversal-padding-hitattr-size.ll | 44 +- llvmraytracing/test/lit.cfg.py | 25 + llvmraytracing/test/lit.site.cfg.py.in | 25 + llvmraytracing/unittests/CMakeLists.txt | 3 +- llvmraytracing/unittests/lit.cfg.py | 25 + llvmraytracing/unittests/lit.site.cfg.py.in | 25 + sharedme/xdl/CMakeLists.txt | 93 + sharedme/xdl/include/lgc/CooperativeMatrix.td | 294 ++ sharedme/xdl/include/lgc/LgcXdlDialect.h | 37 + sharedme/xdl/include/lgc/LgcXdlDialect.td | 62 + sharedme/xdl/include/lgc/LgcXdlTypes.h | 85 + sharedme/xdl/include/lgc/RowAccumulator.td | 171 + sharedme/xdl/include/lgc/Sparse.td | 78 + sharedme/xdl/include/xdl/util/ElementType.h | 66 + sharedme/xdl/lib/LgcXdlDialect.cpp | 36 + sharedme/xdl/test/CMakeLists.txt | 46 + sharedme/xdl/test/lit.cfg.py | 73 + sharedme/xdl/test/lit.site.cfg.py.in | 61 + sharedme/xdl/util/ElementType.cpp | 239 ++ .../subgroupshuffle-index-constant.amber | 72 + .../amber/subgroupshuffle-index-uniform.amber | 81 + test/lit.cfg.py | 25 + test/lit.site.cfg.py.in | 25 + tool/UpdateTestChecks/common.py | 3655 +++++++++++------ tool/dumper/CMakeLists.txt | 10 +- tool/dumper/vkgcPipelineDumper.cpp | 76 + tool/dumper/vkgcPipelineDumper.h | 4 +- tool/update_llpc_test_checks.py | 699 ++-- tool/vfx/vfxVkSection.cpp | 6 +- tool/vfx/vfxVkSection.h | 3 + util/gpurtshim/CMakeLists.txt | 4 +- util/vkgcUtil.cpp | 56 + util/vkgcUtil.h | 5 +- version/CMakeLists.txt | 3 + version/include/llpc/GfxRuntimeCommon.hlsli | 67 + version/include/llpc/GpurtEnums.h | 12 +- version/include/llpc/GpurtIntrinsics.h | 30 +- version/include/llpcVersion.h.in | 4 +- 1646 files changed, 30645 insertions(+), 6676 deletions(-) create mode 100644 cmake/DialectsTablegen.cmake create mode 100644 cmake/FindShaderDbg.cmake create mode 100644 cmake/llpcdeps.cmake create mode 100644 cmake/sharedme/xdl.cmake create mode 100644 compilerutils/include/public/.clang-format create mode 100644 compilerutils/include/public/AmdExtD3DShaderIntrinsics.h create mode 100644 lgc/include/lgc/util/BufferResource.h create mode 100644 lgc/test/Transforms/CpsLowering/bad-max-argument-vgprs.lgc create mode 100644 lgc/test/Transforms/CpsLowering/cps-unify-exits-no-iww.lgc create mode 100644 lgc/test/Transforms/CpsLowering/missing-max-argument-vgprs.lgc create mode 100644 lgc/test/Transforms/LowerGpuRt/lower-gpurt-dialect-op.lgc create mode 100644 lgc/util/BufferResource.cpp create mode 100644 llpc/test/shaderdb/general/MeshOutputsToAllocas.mesh create mode 100644 llpc/test/shaderdb/general/PipelineMesh_TestMismatchMeshInOutWithAllocas.pipe create mode 100644 llpc/test/shaderdb/general/ScheduleStrategy.pipe create mode 100644 llpc/test/shaderdb/general/TessInOutWithReadBackOnlyOutputs.pipe create mode 100644 llpc/test/shaderdb/general/outputArray.frag create mode 100644 llpc/test/shaderdb/ray_tracing/PipelineRays_DifferentPayloads.pipe create mode 100644 llpc/test/shaderdb/ray_tracing/PipelineRays_NoPayload.pipe create mode 100644 llpc/test/shaderdb/xdl/lit.local.cfg create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/base_test.pipe create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/check_attributes.pipe.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/check_globals.pipe.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/check_globals_all.pipe.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/check_globals_smart.pipe.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/function.pipe.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/function_signature.pipe.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/generalize_calls.base.lgc create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/generalize_calls.lgc.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/no_generalize_calls.lgc.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe.expected create mode 100644 llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe.expected.reset create mode 100644 llpc/test/tools/UpdateTestChecks/check_attributes.test create mode 100644 llpc/test/tools/UpdateTestChecks/check_function.test create mode 100644 llpc/test/tools/UpdateTestChecks/check_function_signature.test create mode 100644 llpc/test/tools/UpdateTestChecks/check_globals.test create mode 100644 llpc/test/tools/UpdateTestChecks/check_pal_metadata.test create mode 100644 llpc/test/tools/UpdateTestChecks/lit.local.cfg create mode 100644 llpc/test/tools/UpdateTestChecks/stable_ir_values.test create mode 100644 llpc/test/tools/UpdateTestChecks/stable_ir_values_reset.test create mode 100644 llvmraytracing/lib/DXILEnums.h create mode 100644 llvmraytracing/test/lgccps/CpsLowering/cps-stack-lowering-spirv-global.ll create mode 100644 sharedme/xdl/CMakeLists.txt create mode 100644 sharedme/xdl/include/lgc/CooperativeMatrix.td create mode 100644 sharedme/xdl/include/lgc/LgcXdlDialect.h create mode 100644 sharedme/xdl/include/lgc/LgcXdlDialect.td create mode 100644 sharedme/xdl/include/lgc/LgcXdlTypes.h create mode 100644 sharedme/xdl/include/lgc/RowAccumulator.td create mode 100644 sharedme/xdl/include/lgc/Sparse.td create mode 100644 sharedme/xdl/include/xdl/util/ElementType.h create mode 100644 sharedme/xdl/lib/LgcXdlDialect.cpp create mode 100644 sharedme/xdl/test/CMakeLists.txt create mode 100644 sharedme/xdl/test/lit.cfg.py create mode 100644 sharedme/xdl/test/lit.site.cfg.py.in create mode 100644 sharedme/xdl/util/ElementType.cpp create mode 100644 test/amber/subgroupshuffle-index-constant.amber create mode 100644 test/amber/subgroupshuffle-index-uniform.amber create mode 100644 version/include/llpc/GfxRuntimeCommon.hlsli diff --git a/CMakeLists.txt b/CMakeLists.txt index 669411ac49..170fa9144c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,10 +23,24 @@ # ####################################################################################################################### +# +# CMake file for top level of LLPC repository. +# +# Some variables that the client might set for enabling components: +# +# - ICD_BUILD_LLPC: Enable LLPC on Vulkan and OGLP. +# +# +# +# +# + cmake_minimum_required(VERSION 3.21) project(LLPCrepo LANGUAGES C CXX) +set(LLPC_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + ### Standalone LLPC build handling if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include(cmake/CompilerStandalone.cmake) @@ -38,6 +52,8 @@ endif() include(cmake/llpc_version.cmake) add_llpc_version_projects() +include(CMakeDependentOption) + ### Cached Project Options ############################################################################################# option(LLPC_BUILD_TOOLS "LLPC build all tools" OFF) @@ -51,6 +67,8 @@ option(LLPC_AMD_YUV_IMAGE "Build with AMD_YUV_IMAGE" OFF) option(LLPC_AMD_LVR_INTEROP "Build with AMD_LVR_INTEROP" OFF) #endif +cmake_dependent_option(LLPC_SHAREDME_XDL_BUILD_TEST "Build tests for xdl." ON "LLPC_BUILD_TOOLS" OFF) + ### VKGC aspects ################################################################### # For drivers that use VKGC, as the interface to the LLPC front-end if (FALSE @@ -59,11 +77,12 @@ if (FALSE include("cmake/vkgc.cmake") endif() -### LGC for LLPC ################################################################### +### LLPC dependencies ############################################################## if (ICD_BUILD_LLPC) - # Add LGC and its dependencies as LLVM external projects for LLPC to use. - include("cmake/lgc.cmake") - add_lgc_projects() + # Add LLPC's dependencies (including LGC) as LLVM external projects. This needs + # to be done before LLVM, whereas LLPC itself needs to be done after LLVM. + include("cmake/llpcdeps.cmake") + add_llpcdeps_projects() endif() ### LLVM ########################################################################### @@ -153,10 +172,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set_property(TARGET llvm-dialects-example PROPERTY FOLDER Misc) endif() if (NOT WIN32 AND LLVM_OPTIMIZED_TABLEGEN) -#if _WIN32 - # These targets don't exist on Windows when CMake is first invoked. - # They are created later at build time, when the cross-compilation takes place. -#endif set_property(TARGET llvm_nm_target PROPERTY FOLDER Misc) set_property(TARGET llvm_readobj_target PROPERTY FOLDER Misc) set_property(TARGET llvm-min-tblgen-host PROPERTY FOLDER Misc) @@ -164,12 +179,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set_property(TARGET CONFIGURE_LLVM_NATIVE PROPERTY FOLDER Misc) set_property(TARGET CREATE_LLVM_NATIVE PROPERTY FOLDER Misc) endif() -#if _WIN32 - if(MSVC) - # The 32-bit llvm-tblgen can easily run out of memory. Tell the linker to allow addresses larger than 2GB. - set_property(TARGET llvm-tblgen PROPERTY LINK_FLAGS "/LARGEADDRESSAWARE") - endif() -#endif if (LLPC_BUILD_TESTS) if(NOT LLPC_IS_STANDALONE) set_property(TARGET check-amber PROPERTY FOLDER "LLPC Tests") @@ -181,4 +190,10 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set_property(TARGET check-lgc-units PROPERTY FOLDER "LGC Tests") endif() endif() + + if (TARGET llvm-tblgen) + # The 32-bit llvm-tblgen can easily run out of memory. Tell the linker to allow addresses larger than 2GB. + set_property(TARGET llvm-tblgen PROPERTY LINK_FLAGS "/LARGEADDRESSAWARE") + endif() + endif() diff --git a/cmake/CompilerFlags.cmake b/cmake/CompilerFlags.cmake index 258bd173fd..cf2e035a99 100644 --- a/cmake/CompilerFlags.cmake +++ b/cmake/CompilerFlags.cmake @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -23,7 +23,12 @@ # ####################################################################################################################### -function(set_compiler_options PROJECT_NAME ENABLE_WERROR) +option(LLPC_ENABLE_WERROR "Build LLPC with more errors" OFF) + +# Function to set standard compiler options on the specified target. +# There is an optional second arg to specify whether to apply -Werror; if the arg is not given, +# then that is controlled by LLPC_ENABLE_WERROR. +function(set_compiler_options PROJECT_NAME) target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_20) set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 20 @@ -31,6 +36,11 @@ function(set_compiler_options PROJECT_NAME ENABLE_WERROR) CXX_EXTENSIONS OFF POSITION_INDEPENDENT_CODE ON) + set(ENABLE_WERROR "${ARGN}") + if("${ENABLE_WERROR}" STREQUAL "") + set(ENABLE_WERROR "${LLPC_ENABLE_WERROR}") + endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") if(ENABLE_WERROR) target_compile_options("${PROJECT_NAME}" PRIVATE diff --git a/cmake/CompilerStandalone.cmake b/cmake/CompilerStandalone.cmake index d7f42eea16..9a54000f82 100644 --- a/cmake/CompilerStandalone.cmake +++ b/cmake/CompilerStandalone.cmake @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2021-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -29,48 +29,54 @@ if(COMMAND cmake_policy) cmake_policy(SET CMP0003 NEW) endif(COMMAND cmake_policy) -set(ICD_BUILD_LLPC ON) -set(LLPC_BUILD_TESTS ON) -set(LLPC_BUILD_TOOLS ON) - -set(XGL_VKGC_PATH ${CMAKE_CURRENT_SOURCE_DIR}) - -# This is so spvgen can find vfx in an LLPC standalone build. -set(XGL_LLPC_PATH ${CMAKE_CURRENT_SOURCE_DIR}) - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../pal) - set(XGL_PAL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../pal) -else() - set(XGL_PAL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../../imported/pal) + set(PAL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../pal) +elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../../imported/pal) + set(PAL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../imported/pal) endif() if(EXISTS ${PROJECT_SOURCE_DIR}/../third_party) set(THIRD_PARTY ${PROJECT_SOURCE_DIR}/../third_party) -else() +elseif(EXISTS ${PROJECT_SOURCE_DIR}/../../../../third_party) set(THIRD_PARTY ${PROJECT_SOURCE_DIR}/../../../../third_party) endif() -set(XGL_METROHASH_PATH ${THIRD_PARTY}/metrohash CACHE PATH "The path of metrohash.") -set(XGL_CWPACK_PATH ${THIRD_PARTY}/cwpack CACHE PATH "The path of cwpack.") -add_subdirectory(${XGL_METROHASH_PATH} ${PROJECT_BINARY_DIR}/metrohash) -add_subdirectory(${XGL_CWPACK_PATH} ${PROJECT_BINARY_DIR}/cwpack) - -# External Vulkan headers path -if(EXISTS ${PROJECT_SOURCE_DIR}/../Vulkan-Headers) - set(VULKAN_HEADERS_PATH ${PROJECT_SOURCE_DIR}/../Vulkan-Headers CACHE PATH "The path of Vulkan headers.") +if (THIRD_PARTY) + set(XGL_METROHASH_PATH ${THIRD_PARTY}/metrohash CACHE PATH "The path of metrohash.") + set(XGL_CWPACK_PATH ${THIRD_PARTY}/cwpack CACHE PATH "The path of cwpack.") + add_subdirectory(${XGL_METROHASH_PATH} ${PROJECT_BINARY_DIR}/metrohash) + add_subdirectory(${XGL_CWPACK_PATH} ${PROJECT_BINARY_DIR}/cwpack) endif() -### Khronos Interface -add_library(khronos_vulkan_interface INTERFACE) -if(EXISTS ${VULKAN_HEADERS_PATH}) - target_include_directories(khronos_vulkan_interface INTERFACE ${VULKAN_HEADERS_PATH}/include) - target_compile_definitions(khronos_vulkan_interface INTERFACE EXTERNAL_VULKAN_HEADERS=1) +# Enable LLPC if we found its prerequisites (and it is not explicitly disabled). +if (NOT DEFINED ICD_BUILD_LLPC) + if (THIRD_PARTY AND PAL_SOURCE_DIR) + set(ICD_BUILD_LLPC ON) + else() + message(STATUS "Vulkan-LLPC prerequisites not found; disabling") + endif() endif() -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../xgl) - target_include_directories(khronos_vulkan_interface INTERFACE ../xgl/icd/api/include/khronos) -else() - target_include_directories(khronos_vulkan_interface INTERFACE ../../../icd/api/include/khronos) +if (ICD_BUILD_LLPC) + set(LLPC_BUILD_TESTS ON) + set(LLPC_BUILD_TOOLS ON) + + # External Vulkan headers path + if(EXISTS ${PROJECT_SOURCE_DIR}/../Vulkan-Headers) + set(VULKAN_HEADERS_PATH ${PROJECT_SOURCE_DIR}/../Vulkan-Headers CACHE PATH "The path of Vulkan headers.") + endif() + + ### Khronos Interface + add_library(khronos_vulkan_interface INTERFACE) + if(EXISTS ${VULKAN_HEADERS_PATH}) + target_include_directories(khronos_vulkan_interface INTERFACE ${VULKAN_HEADERS_PATH}/include) + target_compile_definitions(khronos_vulkan_interface INTERFACE EXTERNAL_VULKAN_HEADERS=1) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../xgl) + target_include_directories(khronos_vulkan_interface INTERFACE ../xgl/icd/api/include/khronos) + else() + target_include_directories(khronos_vulkan_interface INTERFACE ../../../icd/api/include/khronos) + endif() endif() -set(GPURT_CLIENT_INTERFACE_MAJOR_VERSION 9999 CACHE STRING "") -set(LLPC_CLIENT_INTERFACE_MAJOR_VERSION "LLPC_INTERFACE_MAJOR_VERSION") -set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 9999 CACHE STRING "") +set(GPURT_CLIENT_INTERFACE_MAJOR_VERSION 999999) +set(LLPC_CLIENT_INTERFACE_MAJOR_VERSION 999999) +set(PAL_CLIENT_INTERFACE_MAJOR_VERSION 999999) diff --git a/cmake/DialectsTablegen.cmake b/cmake/DialectsTablegen.cmake new file mode 100644 index 0000000000..4fb2e6b3eb --- /dev/null +++ b/cmake/DialectsTablegen.cmake @@ -0,0 +1,39 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +# TableGen for dialects +macro(set_dialects_tablegen_exe DIALECTS) + set(${DIALECTS}_TABLEGEN_TARGET llvm-dialects-tblgen) + if (EXISTS ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) + set(${DIALECTS}_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) + else() + if(CMAKE_CROSSCOMPILING) + set(${DIALECTS}_TABLEGEN_TARGET ${LLVM_DIALECTS_TABLEGEN_TARGET_HOST}) + set(${DIALECTS}_TABLEGEN_EXE ${LLVM_DIALECTS_TABLEGEN_EXE_HOST}) + else() + set(${DIALECTS}_TABLEGEN_EXE $) + endif() + endif() +endmacro() diff --git a/cmake/FindShaderDbg.cmake b/cmake/FindShaderDbg.cmake new file mode 100644 index 0000000000..56052352f2 --- /dev/null +++ b/cmake/FindShaderDbg.cmake @@ -0,0 +1,49 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +include(FindPackageHandleStandardArgs) + +set(_SDB_SEARCHES + $ENV{SHADERDBG_DEPTH} + ${GLOBAL_ROOT_SRC_DIR}drivers/ShaderDbg + ${LLPC_SOURCE_DIR}/imported/ShaderDbg + ${LLPC_SOURCE_DIR}/../ShaderDbg + ) + +find_path(SHADERDBG_PATH inc/shaderDbg.h + PATHS ${_SDB_SEARCHES} ENV SHADERDBG_PATH + NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT_PATH) + +find_package_handle_standard_args(ShaderDbg DEFAULT_MSG SHADERDBG_PATH) + +if(SHADERDBG_FOUND) + set(SHADERDBG_INCLUDE_DIRS ${SHADERDBG_PATH}/inc) + set(SDL_BUILD_TOOLS FALSE) + + # Only add the ShaderDbg subdirectory if we haven't already seen it. + if (NOT TARGET ShaderDbg) + add_subdirectory(${SHADERDBG_PATH} ${CMAKE_CURRENT_BINARY_DIR}/ShaderDbg) + endif() +endif() diff --git a/cmake/llpc_version.cmake b/cmake/llpc_version.cmake index ee029391bc..5bd5265778 100644 --- a/cmake/llpc_version.cmake +++ b/cmake/llpc_version.cmake @@ -48,7 +48,7 @@ function(llpc_set_property target scope varName default propertyName) endif() # For an LLPC_ variable, cache it as an option so that GPURT can see it. if ("${varName}" MATCHES "^LLPC_") - option(${varName} "Support ${varName}?" ${${varName}}) + set(${varName} "${${varName}}" CACHE BOOL "Support ${varName}?" FORCE) endif() set(${varName} ${${varName}} PARENT_SCOPE) endif() diff --git a/cmake/llpcdeps.cmake b/cmake/llpcdeps.cmake new file mode 100644 index 0000000000..7c4928e9e6 --- /dev/null +++ b/cmake/llpcdeps.cmake @@ -0,0 +1,38 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +set(LLPC_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/..") + +include("${LLPC_SOURCE_DIR}/cmake/lgc.cmake") +include("${LLPC_SOURCE_DIR}/cmake/sharedme/xdl.cmake") + +# Macro to add LLPC's dependencies as LLVM external projects. +# The function appends the project names to LLVM_EXTERNAL_PROJECTS and sets each +# LLVM_EXTERNAL_*_SOURCE_DIR, all in the caller's scope. +# LLPC itself is not an LLVM external project, so is not added here. +macro(add_llpcdeps_projects) + add_lgc_projects() + add_sharedme_xdl_projects() +endmacro() diff --git a/cmake/llvm.cmake b/cmake/llvm.cmake index 107824b670..7a888fb9ef 100644 --- a/cmake/llvm.cmake +++ b/cmake/llvm.cmake @@ -45,25 +45,24 @@ set(LLVM_INCLUDE_UTILS ON CACHE BOOL "LLVM include utils") set(LLVM_ENABLE_TERMINFO OFF CACHE BOOL "LLVM enable terminfo") set(LLVM_RAM_PER_TABLEGEN_JOB 4000 CACHE STRING "LLVM RAM per tablegen job") set(LLVM_RAM_PER_LINK_JOB 5000 CACHE STRING "LLVM RAM per link job") -if("${CMAKE_BUILD_TYPE}" STREQUAL Debug) +if("${CMAKE_BUILD_TYPE}" STREQUAL Debug OR CMAKE_CONFIGURATION_TYPES) # Build optimized version of llvm-tblgen even in debug builds, for faster build times. set(LLVM_OPTIMIZED_TABLEGEN ON CACHE BOOL "Build optimized llvm-tblgen") -#if _WIN32 - if(LLVM_OPTIMIZED_TABLEGEN AND WIN32 AND (CMAKE_GENERATOR MATCHES "Ninja")) - # LLVM implements the Release build of llvm-tblgen as a cross-compile target, which fails to find - # our DK-based toolchain (created with amd_generate_msvc_toolchain). However, we can inject the toolchain - # argument into LLVM's add_custom_target that sets up this cross-compile build. - # See: llvm-project/llvm/cmake/modules/CrossCompile.cmake - set(CROSS_TOOLCHAIN_FLAGS_NATIVE "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" CACHE STRING - "Toolchain flags for native build" FORCE) + if(LLVM_OPTIMIZED_TABLEGEN AND WIN32) + if(CMAKE_GENERATOR MATCHES "Ninja") + set(CROSS_TOOLCHAIN_FLAGS_NATIVE "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" CACHE STRING + "Toolchain flags for native build" FORCE) - # Fail early to avoid the dreaded -ologo error. - if(CMAKE_VERSION VERSION_LESS "3.27") - message(FATAL_ERROR "Using LLVM_OPTIMIZED_TABLEGEN in a Debug build requires CMake 3.27 or higher." - " The current CMake version is ${CMAKE_VERSION}.") + # Fail early to avoid the dreaded -ologo error. + if(CMAKE_VERSION VERSION_LESS "3.27") + message(FATAL_ERROR "Using LLVM_OPTIMIZED_TABLEGEN in a Debug build requires CMake 3.27 or higher." + " The current CMake version is ${CMAKE_VERSION}.") + endif() + else() + list(APPEND LLVM_CROSS_TOOLCHAIN_FLAGS_NATIVE "-D Python3_ROOT_DIR=${Python3_ROOT_DIR}") + set(CROSS_TOOLCHAIN_FLAGS_NATIVE ${LLVM_CROSS_TOOLCHAIN_FLAGS_NATIVE} CACHE STRING "" FORCE) endif() endif() -#endif endif() # This will greatly speed up debug builds because we won't be listing all the symbols with llvm-nm. @@ -90,15 +89,19 @@ list(APPEND CMAKE_MODULE_PATH "${LLPC_LLVM_BUILD_PATH}/${CMAKE_CFG_INTDIR}/lib/cmake/llvm" # Workaround for VS generator with older LLVM. ) -# Export LLVM build path for client driver. -# TODO: Change uses to LLPC_LLVM_BUILD_PATH. -set(XGL_LLVM_BUILD_PATH ${LLPC_LLVM_BUILD_PATH} PARENT_SCOPE) +if (NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + # Export LLVM build path for client driver. + # TODO: Change uses to LLPC_LLVM_BUILD_PATH. + set(XGL_LLVM_BUILD_PATH ${LLPC_LLVM_BUILD_PATH} PARENT_SCOPE) +endif() # Extract LLVM revision number for code outside the LLPC repository to use. file(READ "${LLPC_LLVM_SRC_PATH}/include/llvm/Config/llvm-config.h.cmake" LLVM_CONFIG_HEADER) string(REGEX MATCH "#define LLVM_MAIN_REVISION ([0-9]+)" "\\1" _ "${LLVM_CONFIG_HEADER}") set(LLVM_MAIN_REVISION "${CMAKE_MATCH_1}") -set(LLVM_MAIN_REVISION ${LLVM_MAIN_REVISION} PARENT_SCOPE) +if (NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(LLVM_MAIN_REVISION ${LLVM_MAIN_REVISION} PARENT_SCOPE) +endif() # Some of the games using old versions of the tcmalloc lib are crashing # when allocating aligned memory. C++17 enables aligned new by default, diff --git a/cmake/sharedme/xdl.cmake b/cmake/sharedme/xdl.cmake new file mode 100644 index 0000000000..459f9bbfc1 --- /dev/null +++ b/cmake/sharedme/xdl.cmake @@ -0,0 +1,34 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +set(LLPC_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../..") + +# Macro to add linear algebra as LLVM external projects. +macro(add_sharedme_xdl_projects) + if (NOT sharedme_xdl IN_LIST LLVM_EXTERNAL_PROJECTS) + list(APPEND LLVM_EXTERNAL_PROJECTS sharedme_xdl) + set(LLVM_EXTERNAL_SHAREDME_XDL_SOURCE_DIR "${LLPC_SOURCE_DIR}/sharedme/xdl") + endif() +endmacro() diff --git a/compilerutils/include/compilerutils/ArgPromotion.h b/compilerutils/include/compilerutils/ArgPromotion.h index 736fef2dc4..b99edfa6e5 100644 --- a/compilerutils/include/compilerutils/ArgPromotion.h +++ b/compilerutils/include/compilerutils/ArgPromotion.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -32,7 +32,7 @@ class Function; class SmallBitVector; } // namespace llvm -namespace CompilerUtils { +namespace compilerutils { // Replace struct return type with its first element type. llvm::Function *unpackStructReturnType(llvm::Function *Fn); @@ -44,4 +44,4 @@ llvm::Function *lowerStructRetArgument(llvm::Function *Fn); // and using pointee type metadata. llvm::Function *promotePointerArguments(llvm::Function *Fn, const llvm::SmallBitVector &PromotionMask); -} // namespace CompilerUtils +} // namespace compilerutils diff --git a/compilerutils/include/compilerutils/CompilerUtils.h b/compilerutils/include/compilerutils/CompilerUtils.h index 3e3d762527..a1ef341da8 100644 --- a/compilerutils/include/compilerutils/CompilerUtils.h +++ b/compilerutils/include/compilerutils/CompilerUtils.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -33,6 +33,7 @@ #define COMPILERUTILS_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Attributes.h" @@ -44,7 +45,7 @@ class PassBuilder; } // namespace llvm -namespace CompilerUtils { +namespace compilerutils { // Register compiler utils passes. void RegisterPasses(llvm::PassBuilder &PB); @@ -165,7 +166,26 @@ llvm::Value *simplifyingCreateConstGEP1_32(llvm::IRBuilder<> &builder, llvm::Typ // Create an inbounds GEP if idx is non-null, otherwise return the pointer. llvm::Value *simplifyingCreateConstInBoundsGEP1_32(llvm::IRBuilder<> &builder, llvm::Type *ty, llvm::Value *ptr, uint32_t idx); -} // namespace CompilerUtils + +namespace bb { +std::string getLabel(const llvm::Function *func); +std::string getLabel(const llvm::BasicBlock *bb); +std::string getLabel(const llvm::Value *v); + +// Returns a concatenated list as string, where each BB label is prefixed by @prefix. In case an empty list is given, +// return @emptyRetValue. +std::string getNamesForBasicBlocks(const llvm::ArrayRef blocks, + llvm::StringRef emptyRetValue = "", llvm::StringRef prefix = " %"); + +// Returns a concatenated list as string, where each BB label is prefixed by @prefix. In case an empty list is given, +// return @emptyRetValue. +std::string getNamesForBasicBlocks(const llvm::SmallSet &blocks, + llvm::StringRef emptyRetValue = "", llvm::StringRef prefix = " %"); +} // namespace bb +} // namespace compilerutils + +// Temporary alias. +namespace CompilerUtils = compilerutils; namespace llvm { diff --git a/compilerutils/include/compilerutils/DxilToLlvm.h b/compilerutils/include/compilerutils/DxilToLlvm.h index 346f1d19d9..3984747acb 100644 --- a/compilerutils/include/compilerutils/DxilToLlvm.h +++ b/compilerutils/include/compilerutils/DxilToLlvm.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -66,7 +66,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" -namespace CompilerUtils { +namespace compilerutils { class DxilToLlvmPass : public llvm::PassInfoMixin { public: @@ -75,4 +75,4 @@ class DxilToLlvmPass : public llvm::PassInfoMixin { static llvm::StringRef name() { return "Convert DXIL to LLVM IR"; } }; -} // namespace CompilerUtils +} // namespace compilerutils diff --git a/compilerutils/include/compilerutils/DxilUtils.h b/compilerutils/include/compilerutils/DxilUtils.h index 837859ebf6..efd4a7997f 100644 --- a/compilerutils/include/compilerutils/DxilUtils.h +++ b/compilerutils/include/compilerutils/DxilUtils.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -33,7 +33,7 @@ #include "llvm/ADT/StringRef.h" -namespace CompilerUtils::dxil { +namespace compilerutils::dxil { // Try to demangle function names in the DXIL format: // ...\01?FuncName@@... @@ -61,4 +61,4 @@ inline llvm::StringRef tryDemangleFunctionName(llvm::StringRef inputName) { return inputName.substr(start, end - start); } -} // namespace CompilerUtils::dxil +} // namespace compilerutils::dxil diff --git a/compilerutils/include/compilerutils/IRSerializationUtils.h b/compilerutils/include/compilerutils/IRSerializationUtils.h index b6cda6761c..4eecf01d48 100644 --- a/compilerutils/include/compilerutils/IRSerializationUtils.h +++ b/compilerutils/include/compilerutils/IRSerializationUtils.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -47,11 +47,13 @@ std::string getModuleHashStr(const llvm::Module &m); // FilenamePrefix.FuncName.Hash.dot where FuncName is determined by demangling // the DXIL function name, and Hash is given by getModuleHashStr. // Set cfgOnly = false to include instructions within the BBs. -void writeCFGToDotFile(const llvm::Function &f, llvm::StringRef filenamePrefix = "cfg", bool cfgOnly = true); +void writeCFGToDotFile(const llvm::Function &f); +void writeCFGToDotFile(const llvm::Function &f, llvm::StringRef filenamePrefix, bool cfgOnly = true); // Writes an LL file with the module. The filename is: // FilenamePrefix.Hash.ll where Hash is given by getModuleHashStr. -void writeModuleToLLFile(const llvm::Module &m, llvm::StringRef filenamePrefix = "module"); +void writeModuleToLLFile(const llvm::Module &m); +void writeModuleToLLFile(const llvm::Module &m, llvm::StringRef filenamePrefix); } // namespace irserializationutils diff --git a/compilerutils/include/compilerutils/LoweringPointerTupleMap.h b/compilerutils/include/compilerutils/LoweringPointerTupleMap.h index 15fa207471..a4b8578192 100644 --- a/compilerutils/include/compilerutils/LoweringPointerTupleMap.h +++ b/compilerutils/include/compilerutils/LoweringPointerTupleMap.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -37,7 +37,7 @@ #include #include -namespace CompilerUtils { +namespace compilerutils { /// @brief A key-value map from pointer keys to tuples of pointers that is optimized for value and type lowering uses /// @@ -252,4 +252,4 @@ template class LoweringPoint } }; -} // namespace CompilerUtils +} // namespace compilerutils diff --git a/compilerutils/include/compilerutils/TypeLowering.h b/compilerutils/include/compilerutils/TypeLowering.h index 4782a022a2..bf32cc6b19 100644 --- a/compilerutils/include/compilerutils/TypeLowering.h +++ b/compilerutils/include/compilerutils/TypeLowering.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -61,7 +61,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/IRBuilder.h" -namespace CompilerUtils { +namespace compilerutils { class TypeLowering; @@ -129,7 +129,8 @@ using ConstantTypeLoweringFn = llvm::SmallVector(TypeLowering /// payload.lowering.finishPhis(); /// /// // Erase all instructions that "have been replaced" (by calling -/// replaceInstruction for them). payload.lowering.finishCleanup(); +/// replaceInstruction for them). +/// payload.finishCleanup(); /// @endcode class TypeLowering { public: @@ -151,6 +152,7 @@ class TypeLowering { void eraseInstruction(llvm::Instruction *); llvm::Function *lowerFunctionArguments(llvm::Function &); + void lowerGlobalVariable(llvm::GlobalVariable &); void finishPhis(); bool finishCleanup(); @@ -171,16 +173,17 @@ class TypeLowering { llvm::SmallVector> m_constantRules; /// Cache mappings of types (including no-op mappings). - CompilerUtils::LoweringPointerTupleMap m_typeConversions; + compilerutils::LoweringPointerTupleMap m_typeConversions; llvm::IRBuilder<> m_builder; /// Map original values to type-converted values. - CompilerUtils::LoweringPointerTupleMap m_valueMap; + compilerutils::LoweringPointerTupleMap m_valueMap; std::vector>> m_phis; std::vector m_instructionsToErase; llvm::SmallVector m_functionsToErase; + llvm::SmallVector m_gvToErase; }; -} // namespace CompilerUtils +} // namespace compilerutils diff --git a/compilerutils/include/compilerutils/ValueOriginTracking.h b/compilerutils/include/compilerutils/ValueOriginTracking.h index dbc5135ce7..9abbe5de39 100644 --- a/compilerutils/include/compilerutils/ValueOriginTracking.h +++ b/compilerutils/include/compilerutils/ValueOriginTracking.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -94,7 +94,7 @@ class Instruction; class Value; } // namespace llvm -namespace CompilerUtils { +namespace compilerutils { namespace ValueTracking { @@ -312,4 +312,4 @@ class ValueOriginTracker { ValueInfo &getOrComputeValueInfo(llvm::Value *V, bool KnownToBeNew = false); }; -} // namespace CompilerUtils +} // namespace compilerutils diff --git a/compilerutils/include/compilerutils/ValueSpecialization.h b/compilerutils/include/compilerutils/ValueSpecialization.h index e1ca13c231..8287a8cffa 100644 --- a/compilerutils/include/compilerutils/ValueSpecialization.h +++ b/compilerutils/include/compilerutils/ValueSpecialization.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -61,7 +61,7 @@ class Module; class StringRef; } // namespace llvm -namespace CompilerUtils { +namespace compilerutils { class ValueSpecializer { public: @@ -173,4 +173,4 @@ class ValueSpecializer { llvm::ArrayRef DwordInfos, llvm::StringRef ReplacementName); }; -} // namespace CompilerUtils +} // namespace compilerutils diff --git a/compilerutils/include/public/.clang-format b/compilerutils/include/public/.clang-format new file mode 100644 index 0000000000..e3845288a2 --- /dev/null +++ b/compilerutils/include/public/.clang-format @@ -0,0 +1 @@ +DisableFormat: true diff --git a/compilerutils/include/public/AmdExtD3DShaderIntrinsics.h b/compilerutils/include/public/AmdExtD3DShaderIntrinsics.h new file mode 100644 index 0000000000..1575f077a5 --- /dev/null +++ b/compilerutils/include/public/AmdExtD3DShaderIntrinsics.h @@ -0,0 +1,614 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2016-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +#pragma once + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsInfo structure that defines various support levels by the ASIC +* for shader intrinsics extension. +*********************************************************************************************************************** +*/ +struct AmdExtD3DShaderIntrinsicsInfo +{ + unsigned int waveSize; + unsigned int minWaveSize; + unsigned int maxWaveSize; + unsigned int reserved[5]; +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsics instruction encoding +* [07:00] Instruction opcode (AmdExtD3DShaderIntrinsicsOpcode) +* [23:08] Opcode-specific data +* [25:24] Opcode phase +* [27:26] Reserved +* [31:28] Shader intrinsics magic code (AmdExtD3DShaderIntrinsics_MagicCode) +* +*********************************************************************************************************************** +*/ +const unsigned int AmdExtD3DShaderIntrinsics_MagicCodeShift = 28; +const unsigned int AmdExtD3DShaderIntrinsics_MagicCodeMask = 0xf; +const unsigned int AmdExtD3DShaderIntrinsics_OpcodePhaseShift = 24; +const unsigned int AmdExtD3DShaderIntrinsics_OpcodePhaseMask = 0x3; +const unsigned int AmdExtD3DShaderIntrinsics_DataShift = 8; +const unsigned int AmdExtD3DShaderIntrinsics_DataMask = 0xffff; +const unsigned int AmdExtD3DShaderIntrinsics_OpcodeShift = 0; +const unsigned int AmdExtD3DShaderIntrinsics_OpcodeMask = 0xff; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsics_MagicCode +* Used to distinguish 32 bit imm dst in a imm_atomic_cmp_exch as an intrinsic instruction +* +*********************************************************************************************************************** +*/ +const unsigned int AmdExtD3DShaderIntrinsics_MagicCode = 0x5; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsOpcode enumeration +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsOpcode +{ + AmdExtD3DShaderIntrinsicsOpcode_Readfirstlane = 0x1, + AmdExtD3DShaderIntrinsicsOpcode_Readlane = 0x2, + AmdExtD3DShaderIntrinsicsOpcode_LaneId = 0x3, + AmdExtD3DShaderIntrinsicsOpcode_Swizzle = 0x4, + AmdExtD3DShaderIntrinsicsOpcode_Ballot = 0x5, + AmdExtD3DShaderIntrinsicsOpcode_MBCnt = 0x6, + AmdExtD3DShaderIntrinsicsOpcode_Min3U = 0x7, + AmdExtD3DShaderIntrinsicsOpcode_Min3F = 0x8, + AmdExtD3DShaderIntrinsicsOpcode_Med3U = 0x9, + AmdExtD3DShaderIntrinsicsOpcode_Med3F = 0xa, + AmdExtD3DShaderIntrinsicsOpcode_Max3U = 0xb, + AmdExtD3DShaderIntrinsicsOpcode_Max3F = 0xc, + AmdExtD3DShaderIntrinsicsOpcode_BaryCoord = 0xd, + AmdExtD3DShaderIntrinsicsOpcode_VtxParam = 0xe, + AmdExtD3DShaderIntrinsicsOpcode_Reserved1 = 0xf, + AmdExtD3DShaderIntrinsicsOpcode_Reserved2 = 0x10, + AmdExtD3DShaderIntrinsicsOpcode_Reserved3 = 0x11, + AmdExtD3DShaderIntrinsicsOpcode_WaveReduce = 0x12, + AmdExtD3DShaderIntrinsicsOpcode_WaveScan = 0x13, + AmdExtD3DShaderIntrinsicsOpcode_LoadDwordAtAddr = 0x14, + AmdExtD3DShaderIntrinsicsOpcode_Reserved4 = 0x15, + AmdExtD3DShaderIntrinsicsOpcode_IntersectInternal = 0x16, + AmdExtD3DShaderIntrinsicsOpcode_DrawIndex = 0x17, + AmdExtD3DShaderIntrinsicsOpcode_AtomicU64 = 0x18, + AmdExtD3DShaderIntrinsicsOpcode_GetWaveSize = 0x19, + AmdExtD3DShaderIntrinsicsOpcode_BaseInstance = 0x1a, + AmdExtD3DShaderIntrinsicsOpcode_BaseVertex = 0x1b, + AmdExtD3DShaderIntrinsicsOpcode_FloatConversion = 0x1c, + AmdExtD3DShaderIntrinsicsOpcode_ReadlaneAt = 0x1d, + AmdExtD3DShaderIntrinsicsOpcode_RayTraceHitToken = 0x1e, + AmdExtD3DShaderIntrinsicsOpcode_ShaderClock = 0x1f, + AmdExtD3DShaderIntrinsicsOpcode_ShaderRealtimeClock = 0x20, + AmdExtD3DShaderIntrinsicsOpcode_Halt = 0x21, + AmdExtD3DShaderIntrinsicsOpcode_IntersectBvhNode = 0x22, + AmdExtD3DShaderIntrinsicsOpcode_BufferStoreByte = 0x23, + AmdExtD3DShaderIntrinsicsOpcode_BufferStoreShort = 0x24, + AmdExtD3DShaderIntrinsicsOpcode_ShaderMarker = 0x25, + AmdExtD3DShaderIntrinsicsOpcode_FloatOpWithRoundMode = 0x26, + AmdExtD3DShaderIntrinsicsOpcode_Reserved5 = 0x27, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixMulAcc = 0x28, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavLoad = 0x29, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixUavStore = 0x2a, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixGlobalLoad = 0x2b, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixGlobalStore = 0x2c, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLdsLoad = 0x2d, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLdsStore = 0x2e, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixElementFill = 0x2f, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixElementExtract = 0x30, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixLength = 0x31, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixCopy = 0x32, + AmdExtD3DShaderIntrinsicsOpcode_WaveMatrixFill = 0x33, + AmdExtD3DShaderIntrinsicsOpcode_MatrixSparsityIndexLoad = 0x34, + AmdExtD3DShaderIntrinsicsOpcode_MatrixElementWiseArithmetic = 0x35, + AmdExtD3DShaderIntrinsicsOpcode_Float8Conversion = 0x36, + AmdExtD3DShaderIntrinsicsOpcode_BuiltIn1 = 0x37, + AmdExtD3DShaderIntrinsicsOpcode_BuiltInArg = 0x38, + AmdExtD3DShaderIntrinsicsOpcode_LastValidOpcode = 0x38, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsOpcodePhase enumeration +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsOpcodePhase +{ + AmdExtD3DShaderIntrinsicsOpcodePhase_0 = 0x0, + AmdExtD3DShaderIntrinsicsOpcodePhase_1 = 0x1, + AmdExtD3DShaderIntrinsicsOpcodePhase_2 = 0x2, + AmdExtD3DShaderIntrinsicsOpcodePhase_3 = 0x3, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsBarycentric enumeration to specify the interplation mode +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsBarycentric +{ + AmdExtD3DShaderIntrinsicsBarycentric_LinearCenter = 0x1, + AmdExtD3DShaderIntrinsicsBarycentric_LinearCentroid = 0x2, + AmdExtD3DShaderIntrinsicsBarycentric_LinearSample = 0x3, + AmdExtD3DShaderIntrinsicsBarycentric_PerspCenter = 0x4, + AmdExtD3DShaderIntrinsicsBarycentric_PerspCentroid = 0x5, + AmdExtD3DShaderIntrinsicsBarycentric_PerspSample = 0x6, + AmdExtD3DShaderIntrinsicsBarycentric_PerspPullModel = 0x7, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsWaveOp enumeration to specify the wave operation +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsWaveOp +{ + AmdExtD3DShaderIntrinsicsWaveOp_AddF = 0x01, + AmdExtD3DShaderIntrinsicsWaveOp_AddI = 0x02, + AmdExtD3DShaderIntrinsicsWaveOp_AddU = 0x03, + AmdExtD3DShaderIntrinsicsWaveOp_MulF = 0x04, + AmdExtD3DShaderIntrinsicsWaveOp_MulI = 0x05, + AmdExtD3DShaderIntrinsicsWaveOp_MulU = 0x06, + AmdExtD3DShaderIntrinsicsWaveOp_MinF = 0x07, + AmdExtD3DShaderIntrinsicsWaveOp_MinI = 0x08, + AmdExtD3DShaderIntrinsicsWaveOp_MinU = 0x09, + AmdExtD3DShaderIntrinsicsWaveOp_MaxF = 0x0a, + AmdExtD3DShaderIntrinsicsWaveOp_MaxI = 0x0b, + AmdExtD3DShaderIntrinsicsWaveOp_MaxU = 0x0c, + AmdExtD3DShaderIntrinsicsWaveOp_And = 0x0d, + AmdExtD3DShaderIntrinsicsWaveOp_Or = 0x0e, + AmdExtD3DShaderIntrinsicsWaveOp_Xor = 0x0f, +}; + +/** +*********************************************************************************************************************** +* @brief +* Shifts and masks for the arguments required for WaveOps +*********************************************************************************************************************** +*/ +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_OpcodeShift = 0; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_OpcodeMask = 0xff; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_FlagShift = 8; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_FlagMask = 0xff; + +// Following flags only apply to AmdExtD3DShaderIntrinsicsSupport_WaveScan +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_Inclusive = 0x1; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_Exclusive = 0x2; + +// Following flags only apply to AmdExtD3DShaderIntrinsicsSupport_WaveReduce +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_ClusterSizeNone = 0x0; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_ClusterSize1 = 0x1; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_ClusterSize2 = 0x2; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_ClusterSize4 = 0x3; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_ClusterSize8 = 0x4; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_ClusterSize16 = 0x5; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_ClusterSize32 = 0x6; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveOp_ClusterSize64 = 0x7; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtShaderIntrinsicsSupport enumeration used to check the opcode type supported by extension. +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsSupport +{ + AmdExtD3DShaderIntrinsicsSupport_Readfirstlane = 0x1, + AmdExtD3DShaderIntrinsicsSupport_Readlane = 0x2, + AmdExtD3DShaderIntrinsicsSupport_LaneId = 0x3, + AmdExtD3DShaderIntrinsicsSupport_Swizzle = 0x4, + AmdExtD3DShaderIntrinsicsSupport_Ballot = 0x5, + AmdExtD3DShaderIntrinsicsSupport_MBCnt = 0x6, + AmdExtD3DShaderIntrinsicsSupport_Compare3 = 0x7, + AmdExtD3DShaderIntrinsicsSupport_Barycentrics = 0x8, + AmdExtD3DShaderIntrinsicsSupport_WaveReduce = 0x9, + AmdExtD3DShaderIntrinsicsSupport_WaveScan = 0xA, + AmdExtD3DShaderIntrinsicsSupport_LoadDwordAtAddr = 0xB, + AmdExtD3DShaderIntrinsicsSupport_Reserved1 = 0xC, + AmdExtD3DShaderIntrinsicsSupport_IntersectInternal = 0xD, + AmdExtD3DShaderIntrinsicsSupport_DrawIndex = 0xE, + AmdExtD3DShaderIntrinsicsSupport_AtomicU64 = 0xF, + AmdExtD3DShaderIntrinsicsSupport_BaseInstance = 0x10, + AmdExtD3DShaderIntrinsicsSupport_BaseVertex = 0x11, + AmdExtD3DShaderIntrinsicsSupport_FloatConversion = 0x12, + AmdExtD3DShaderIntrinsicsSupport_GetWaveSize = 0x13, + AmdExtD3DShaderIntrinsicsSupport_ReadlaneAt = 0x14, + AmdExtD3DShaderIntrinsicsSupport_RayTraceHitToken = 0x15, + AmdExtD3DShaderIntrinsicsSupport_ShaderClock = 0x16, + AmdExtD3DShaderIntrinsicsSupport_ShaderRealtimeClock = 0x17, + AmdExtD3DShaderIntrinsicsSupport_Halt = 0x18, + AmdExtD3DShaderIntrinsicsSupport_IntersectBvhNode = 0x19, + AmdExtD3DShaderIntrinsicsSupport_BufferStoreByte = 0x1A, + AmdExtD3DShaderIntrinsicsSupport_BufferStoreShort = 0x1B, + AmdExtD3DShaderIntrinsicsSupport_ShaderMarker = 0x1C, + AmdExtD3DShaderIntrinsicsSupport_FloatOpWithRoundMode = 0x1D, + AmdExtD3DShaderIntrinsicsSupport_Reserved2 = 0x1E, + AmdExtD3DShaderIntrinsicsSupport_WaveMatrix = 0x1F, + AmdExtD3DShaderIntrinsicsSupport_Float8Conversion = 0x20, + AmdExtD3DShaderIntrinsicsSupport_Builtins = 0x21, + AmdExtD3DShaderIntrinsicsSupport_ExecutionReordering = 0x22, +}; + +/** +*********************************************************************************************************************** +* @brief +* Shifts and masks for the arguments required to reference a vertex parameter +*********************************************************************************************************************** +*/ +const unsigned int AmdExtD3DShaderIntrinsicsBarycentric_ParamShift = 0; +const unsigned int AmdExtD3DShaderIntrinsicsBarycentric_ParamMask = 0x1f; +const unsigned int AmdExtD3DShaderIntrinsicsBarycentric_VtxShift = 0x5; +const unsigned int AmdExtD3DShaderIntrinsicsBarycentric_VtxMask = 0x3; +const unsigned int AmdExtD3DShaderIntrinsicsBarycentric_ComponentShift = 0x7; +const unsigned int AmdExtD3DShaderIntrinsicsBarycentric_ComponentMask = 0x3; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsAtomicOp enumeration to specify the atomic operation +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsAtomicOp +{ + AmdExtD3DShaderIntrinsicsAtomicOp_MinU64 = 0x01, + AmdExtD3DShaderIntrinsicsAtomicOp_MaxU64 = 0x02, + AmdExtD3DShaderIntrinsicsAtomicOp_AndU64 = 0x03, + AmdExtD3DShaderIntrinsicsAtomicOp_OrU64 = 0x04, + AmdExtD3DShaderIntrinsicsAtomicOp_XorU64 = 0x05, + AmdExtD3DShaderIntrinsicsAtomicOp_AddU64 = 0x06, + AmdExtD3DShaderIntrinsicsAtomicOp_XchgU64 = 0x07, + AmdExtD3DShaderIntrinsicsAtomicOp_CmpXchgU64 = 0x08, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsFloatConversionOp enumeration to specify the type and rounding mode of +* float to float16 conversion +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsFloatConversionOp +{ + AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16Near = 0x01, + AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16NegInf = 0x02, + AmdExtD3DShaderIntrinsicsFloatConversionOp_FToF16PlusInf = 0x03, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode enumeration for supported round modes. +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsFloatOpWithRoundModeMode +{ + AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_TiesToEven = 0x0, + AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_TowardPositive = 0x1, + AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_TowardNegative = 0x2, + AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_TowardZero = 0x3, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode enumeration for supported float operations. +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsFloatOpWithRoundModeOp +{ + AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_Add = 0x0, + AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_Subtract = 0x1, + AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_Multiply = 0x2, +}; + +/** +*********************************************************************************************************************** +* @brief +* Shifts and masks for the arguments required for AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode +*********************************************************************************************************************** +*/ +constexpr unsigned int AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_ModeShift = 0; +constexpr unsigned int AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_ModeMask = 0xf; +constexpr unsigned int AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_OpShift = 4; +constexpr unsigned int AmdExtD3DShaderIntrinsicsFloatOpWithRoundMode_OpMask = 0xf; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsFloat8CvtOp enumeration to specify the conversion operation +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsFloat8CvtOp +{ + AmdExtD3DShaderIntrinsicsFloat8CvtOp_FP8_2_F32 = 0x0, + AmdExtD3DShaderIntrinsicsFloat8CvtOp_BF8_2_F32 = 0x1, + AmdExtD3DShaderIntrinsicsFloat8CvtOp_F32_2_FP8 = 0x2, + AmdExtD3DShaderIntrinsicsFloat8CvtOp_F32_2_BF8 = 0x3, +}; + +/** +*********************************************************************************************************************** +* @brief +* Shifts and masks for the arguments required for AmdExtD3DShaderIntrinsicsFloat8Conversion +*********************************************************************************************************************** +*/ +constexpr unsigned int AmdExtD3DShaderIntrinsicsFloat8Conversion_CvtOpShift = 0; +constexpr unsigned int AmdExtD3DShaderIntrinsicsFloat8Conversion_CvtOpMask = 0xff; +constexpr unsigned int AmdExtD3DShaderIntrinsicsFloat8Conversion_SatShift = 8; +constexpr unsigned int AmdExtD3DShaderIntrinsicsFloat8Conversion_SatMask = 0x1; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat enumeration for supported matrix element data format. +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsWaveMatrixOpDataFormat +{ + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_I4 = 0x0, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_U4 = 0x1, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_I8 = 0x2, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_U8 = 0x3, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F16 = 0x4, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_BF16 = 0x5, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_F32 = 0x6, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_I32 = 0x7, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_U32 = 0x8, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_BF8 = 0x9, + AmdExtD3DShaderIntrinsicsWaveMatrixDataFormat_FP8 = 0xa, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsWaveMatrixType enumeration for supported wave matrix type. +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsWaveMatrixOpMatrixType +{ + AmdExtD3DShaderIntrinsicsWaveMatrixType_A = 0x0, + AmdExtD3DShaderIntrinsicsWaveMatrixType_B = 0x1, + AmdExtD3DShaderIntrinsicsWaveMatrixType_Accumulator = 0x2, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsWaveMatrixMatrixShape enumeration for supported wave matrix shape. +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsWaveMatrixOpMatrixShape +{ + AmdExtD3DShaderIntrinsicsWaveMatrixShape_16X16 = 0x0, + AmdExtD3DShaderIntrinsicsWaveMatrixShape_32X16 = 0x1, + AmdExtD3DShaderIntrinsicsWaveMatrixShape_16X32 = 0x2, + AmdExtD3DShaderIntrinsicsWaveMatrixShape_64X16 = 0x3, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsWaveMatrixOpcode enumeration to specify the wmma inst opcode. +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsWaveMatrixOpcode +{ + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_BF16_16X16X16_BF16 = 0x0, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F16_16X16X16_F16 = 0x1, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_BF16 = 0x2, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_BF8_BF8 = 0x3, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_BF8_FP8 = 0x4, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_F16 = 0x5, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_FP8_BF8 = 0x6, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_F32_16X16X16_FP8_FP8 = 0x7, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_I4 = 0x8, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_U4 = 0x9, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_IU4 = 0xa, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_UI4 = 0xb, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_I8 = 0xc, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_U8 = 0xd, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_IU8 = 0xe, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X16_UI8 = 0xf, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X32_I4 = 0x10, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X32_U4 = 0x11, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X32_IU4 = 0x12, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_WMMA_I32_16X16X32_UI4 = 0x13, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_BF16_16X16X32_BF16 = 0x14, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F16_16X16X32_F16 = 0x15, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_BF16 = 0x16, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_BF8_BF8 = 0x17, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_BF8_FP8 = 0x18, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_F16 = 0x19, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_FP8_BF8 = 0x1a, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_F32_16X16X32_FP8_FP8 = 0x1b, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_I4 = 0x1c, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_U4 = 0x1d, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_IU4 = 0x1e, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_UI4 = 0x1f, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_I8 = 0x20, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_U8 = 0x21, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_IU8 = 0x22, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X32_UI8 = 0x23, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X64_I4 = 0x24, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X64_U4 = 0x25, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X64_IU4 = 0x26, + AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_SWMMA_I32_16X16X64_UI4 = 0x27, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsWaveMatrixRegType enumeration to specify the temp register. +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsWaveMatrixRegType +{ + AmdExtD3DShaderIntrinsicsWaveMatrixRegType_RetVal_Reg = 0x0, + AmdExtD3DShaderIntrinsicsWaveMatrixRegType_A_TempReg = 0x1, + AmdExtD3DShaderIntrinsicsWaveMatrixRegType_B_TempReg = 0x2, + AmdExtD3DShaderIntrinsicsWaveMatrixRegType_Accumulator_TempReg = 0x3, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsMatrixElementWiseOp enumeration to specify the element-wise operation +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsMatrixElementWiseOp +{ + AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Add = 0x1, + AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Sub = 0x2, + AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Mul = 0x3, + AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Div = 0x4, + AmdExtD3DShaderIntrinsicsMatrixElementWiseOp_Times = 0x5, +}; + +/** +*********************************************************************************************************************** +* @brief +* AmdExtD3DShaderIntrinsicsSparsityIndexMem enumeration is used to specify where to read sparsity indexes. +*********************************************************************************************************************** +*/ +enum AmdExtD3DShaderIntrinsicsSparsityIndexMem +{ + AmdExtD3DShaderIntrinsicsSparsityIndexMem_UavBuffer = 0x0, + AmdExtD3DShaderIntrinsicsSparsityIndexMem_GroupShared = 0x1, + AmdExtD3DShaderIntrinsicsSparsityIndexMem_GlobalBuffer = 0x2, +}; + +/** +*********************************************************************************************************************** +* @brief +* Shifts and masks for the arguments required for AmdExtD3DShaderIntrinsicsWaveMatrixOpcode +*********************************************************************************************************************** +*/ +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_OpsShift = 0; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_OpsMask = 0x7f; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_FlagShift = 15; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixOpcode_FlagMask = 0x1; + +/** +*********************************************************************************************************************** +* @brief +* Shifts and masks for the arguments required for AmdExtD3DShaderIntrinsicsWaveMatrixOpInOut +*********************************************************************************************************************** +*/ +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_ChannelShift = 0; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_ChannelMask = 0xf; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_SecondRegFlagShift = 4; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_SecondRegFlagMask = 0xf; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_MatRegTypeFlagShift = 8; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixInOut_MatRegTypeFlagMask = 0xff; + +/** +*********************************************************************************************************************** +* @brief +* Shifts and masks for the arguments required for AmdExtD3DShaderIntrinsicsWaveMatrixModifier +*********************************************************************************************************************** +*/ +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_DataFormatFlagShift = 0; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_DataFormatFlagMask = 0xf; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagShift = 4; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTypeFlagMask = 0x7; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_LayoutFlagShift = 7; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_LayoutFlagMask = 0x1; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_ShapeShift = 8; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_ShapeMask = 0x7; +// Following flags only apply to AmdExtD3DShaderIntrinsicsOpcode_WaveMatrix*Load +// and AmdExtD3DShaderIntrinsicsOpcode_WaveMatrix*Store +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTileShift = 11; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_MatrixTileMask = 0x1; +// Following flags only apply to AmdExtD3DShaderIntrinsicsOpcode_MatrixSparsityIndexLoad +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_IndexMemTypeShift = 14; +constexpr unsigned int AmdExtD3DShaderIntrinsicsWaveMatrixModifier_IndexMemTypeMask = 0x3; + +/** +*********************************************************************************************************************** +* @brief +* Creates instruction from supplied opcode and immediate data. +* +*********************************************************************************************************************** +*/ +constexpr unsigned int MakeAmdShaderIntrinsicsInstruction( + unsigned int opcode, + unsigned int opcodePhase, + unsigned int immediateData) +{ + return (AmdExtD3DShaderIntrinsics_MagicCode << AmdExtD3DShaderIntrinsics_MagicCodeShift) | + ((immediateData & AmdExtD3DShaderIntrinsics_DataMask) << AmdExtD3DShaderIntrinsics_DataShift) | + ((opcodePhase & AmdExtD3DShaderIntrinsics_OpcodePhaseMask) << AmdExtD3DShaderIntrinsics_OpcodePhaseShift) | + ((opcode & AmdExtD3DShaderIntrinsics_OpcodeMask) << AmdExtD3DShaderIntrinsics_OpcodeShift); +} + +/** +*********************************************************************************************************************** +* @brief +* Helper struct to unpack shader intrinsic instructions. +* +*********************************************************************************************************************** +*/ +struct UnpackedAmdShaderIntrinsicsInstruction { + unsigned int Magic; + unsigned int Opcode; + unsigned int Phase; + unsigned int ImmediateData; + + static constexpr UnpackedAmdShaderIntrinsicsInstruction Unpack(unsigned int packedInst) + { + return UnpackedAmdShaderIntrinsicsInstruction + { + (packedInst >> AmdExtD3DShaderIntrinsics_MagicCodeShift) & AmdExtD3DShaderIntrinsics_MagicCodeMask, + (packedInst >> AmdExtD3DShaderIntrinsics_OpcodeShift) & AmdExtD3DShaderIntrinsics_OpcodeMask, + (packedInst >> AmdExtD3DShaderIntrinsics_OpcodePhaseShift) & AmdExtD3DShaderIntrinsics_OpcodePhaseMask, + (packedInst >> AmdExtD3DShaderIntrinsics_DataShift) & AmdExtD3DShaderIntrinsics_DataMask, + }; + } + + constexpr bool IsValid() const + { + return Magic == AmdExtD3DShaderIntrinsics_MagicCode; + } +}; + +// AMD shader intrinsics designated SpaceId. Denotes Texture3D resource and static sampler used in conjunction with +// intrinsic instructions. +// Applications should use this value for D3D12_ROOT_DESCRIPTOR::RegisterSpace and +// D3D12_STATIC_SAMPLER_DESC::RegisterSpace when creating root descriptor entries for shader intrinsic Texture3D and +// static sampler. +// NOTE: D3D reserves SpaceIds in range 0xFFFFFFF0 - 0xFFFFFFFF +const unsigned int AmdExtD3DShaderIntrinsicsSpaceId = 0x7FFF0ADE; // 2147420894 diff --git a/compilerutils/lib/ArgPromotion.cpp b/compilerutils/lib/ArgPromotion.cpp index a8bd2b06e7..9821df54ee 100644 --- a/compilerutils/lib/ArgPromotion.cpp +++ b/compilerutils/lib/ArgPromotion.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -35,7 +35,7 @@ using namespace llvm; static Function *cloneFunctionHeaderWithTypes(Function &F, TypedFuncTy NewType, AttributeList FnAttr) { FunctionType *FuncTy = NewType.asFunctionType(); - Function *NewFunc = CompilerUtils::cloneFunctionHeader(F, FuncTy, FnAttr); + Function *NewFunc = compilerutils::cloneFunctionHeader(F, FuncTy, FnAttr); NewType.writeMetadata(NewFunc); return NewFunc; } @@ -55,7 +55,7 @@ static Function *cloneFunctionWithTypes(Function *Fn, TypedFuncTy NewFnTy, Attri /// Unpack the return (struct) type of the input function, which means change /// the return type to its first element type. This may generate invalid IR in /// general, call this with extra caution. -Function *CompilerUtils::unpackStructReturnType(Function *Fn) { +Function *compilerutils::unpackStructReturnType(Function *Fn) { auto *RetTy = Fn->getReturnType(); assert(RetTy->isStructTy()); auto *NewRetTy = RetTy->getStructElementType(0); @@ -87,7 +87,7 @@ Function *CompilerUtils::unpackStructReturnType(Function *Fn) { } // Turn `StructRet` argument into more canonical return statement. -Function *CompilerUtils::lowerStructRetArgument(Function *Fn) { +Function *compilerutils::lowerStructRetArgument(Function *Fn) { assert(Fn->getReturnType()->isVoidTy()); auto *RetArg = Fn->getArg(0); if (!RetArg->hasStructRetAttr()) @@ -156,7 +156,7 @@ Function *CompilerUtils::lowerStructRetArgument(Function *Fn) { /// Promote pointer argument type to its value type if the corresponding bit in /// `PromotionMask` is being set. -Function *CompilerUtils::promotePointerArguments(Function *Fn, const SmallBitVector &PromotionMask) { +Function *compilerutils::promotePointerArguments(Function *Fn, const SmallBitVector &PromotionMask) { SmallVector ArgTys; SmallVector ParamAttrs; diff --git a/compilerutils/lib/CompilerUtils.cpp b/compilerutils/lib/CompilerUtils.cpp index 80ebc4be12..49206ff883 100644 --- a/compilerutils/lib/CompilerUtils.cpp +++ b/compilerutils/lib/CompilerUtils.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -27,13 +27,13 @@ #include "ValueOriginTrackingTestPass.h" #include "ValueSpecializationTestPass.h" #include "compilerutils/DxilToLlvm.h" -#include "compilerutils/DxilUtils.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Passes/PassBuilder.h" @@ -45,7 +45,7 @@ #define DEBUG_TYPE "compilerutils" using namespace llvm; -using namespace CompilerUtils; +using namespace compilerutils; // Whether this is a load instruction that should translate to a last_use // load. @@ -60,7 +60,7 @@ static constexpr const char *MDIsLastUseName = "amdgpu.last.use"; // @param args : Arguments to pass to the callee // @param attribs : Function attributes // @param instName : Name to give instruction -CallInst *CompilerUtils::createNamedCall(IRBuilder<> &builder, StringRef funcName, Type *retTy, ArrayRef args, +CallInst *compilerutils::createNamedCall(IRBuilder<> &builder, StringRef funcName, Type *retTy, ArrayRef args, ArrayRef attribs, const Twine &instName) { assert(!funcName.empty()); Module *mod = builder.GetInsertBlock()->getParent()->getParent(); @@ -105,7 +105,7 @@ CallInst *CompilerUtils::createNamedCall(IRBuilder<> &builder, StringRef funcNam // Modify the function argument types, and return the new function. NOTE: the // function does not do any uses replacement, so the caller should call // replaceAllUsesWith() for the function and arguments afterwards. -Function *CompilerUtils::mutateFunctionArguments(Function &fn, Type *retTy, const ArrayRef argTys, +Function *compilerutils::mutateFunctionArguments(Function &fn, Type *retTy, const ArrayRef argTys, AttributeList attributes) { FunctionType *newFnTy = FunctionType::get(retTy, argTys, false); auto *newFn = cloneFunctionHeader(fn, newFnTy, attributes); @@ -114,7 +114,7 @@ Function *CompilerUtils::mutateFunctionArguments(Function &fn, Type *retTy, cons return newFn; } -Function *CompilerUtils::cloneFunctionHeader(Function &f, FunctionType *newType, AttributeList attributes, +Function *compilerutils::cloneFunctionHeader(Function &f, FunctionType *newType, AttributeList attributes, Module *targetModule) { LLVM_DEBUG(dbgs() << "Cloning function " << f.getName() << " with new type " << *newType << "\n"); Function *newFunc = Function::Create(newType, f.getLinkage(), "", targetModule); @@ -136,7 +136,7 @@ Function *CompilerUtils::cloneFunctionHeader(Function &f, FunctionType *newType, return newFunc; } -Function *CompilerUtils::cloneFunctionHeader(Function &f, FunctionType *newType, ArrayRef argAttrs, +Function *compilerutils::cloneFunctionHeader(Function &f, FunctionType *newType, ArrayRef argAttrs, Module *targetModule) { const AttributeList fAttrs = f.getAttributes(); const AttributeList attributes = @@ -144,7 +144,7 @@ Function *CompilerUtils::cloneFunctionHeader(Function &f, FunctionType *newType, return cloneFunctionHeader(f, newType, attributes, targetModule); } -void CompilerUtils::createUnreachable(llvm::IRBuilder<> &b) { +void compilerutils::createUnreachable(llvm::IRBuilder<> &b) { auto *unreachable = b.CreateUnreachable(); auto it = ++unreachable->getIterator(); auto *bb = unreachable->getParent(); @@ -158,7 +158,7 @@ void CompilerUtils::createUnreachable(llvm::IRBuilder<> &b) { DeleteDeadBlock(oldCode); } -void CompilerUtils::setIsLastUseLoad(llvm::LoadInst &Load) { +void compilerutils::setIsLastUseLoad(llvm::LoadInst &Load) { Load.setMetadata(MDIsLastUseName, MDTuple::get(Load.getContext(), {})); } @@ -169,7 +169,7 @@ void CompilerUtils::setIsLastUseLoad(llvm::LoadInst &Load) { // @param function: The Function to modify // @param builder: An IRBuilder instance used for inserting new instructions // @param blockName: The name to give to the new unified return block -llvm::ReturnInst *CompilerUtils::unifyReturns(Function &function, llvm::IRBuilder<> &builder, const Twine &blockName) { +llvm::ReturnInst *compilerutils::unifyReturns(Function &function, llvm::IRBuilder<> &builder, const Twine &blockName) { SmallVector retInsts; for (BasicBlock &block : function) { @@ -321,7 +321,7 @@ CrossModuleInliner &CrossModuleInliner::operator=(CrossModuleInliner &&inliner) CrossModuleInliner::~CrossModuleInliner() = default; -iterator_range CompilerUtils::CrossModuleInliner::inlineCall(CallBase &cb) { +iterator_range compilerutils::CrossModuleInliner::inlineCall(CallBase &cb) { auto *calleeFunc = cb.getCalledFunction(); assert(calleeFunc && "Cannot find called function"); checkTargetModule(*cb.getFunction()->getParent()); @@ -402,8 +402,8 @@ iterator_range CompilerUtils::CrossModuleInliner::inlineCall return make_range(firstNewBb, lastNewBb); } -CompilerUtils::CrossModuleInlinerResult -CompilerUtils::CrossModuleInliner::inlineCall(IRBuilder<> &b, llvm::Function *callee, +compilerutils::CrossModuleInlinerResult +compilerutils::CrossModuleInliner::inlineCall(IRBuilder<> &b, llvm::Function *callee, llvm::ArrayRef args) { auto *call = b.CreateCall(callee, args); // Create a fake use, so we can get the result of the inlined function. @@ -441,7 +441,7 @@ CompilerUtils::CrossModuleInliner::inlineCall(IRBuilder<> &b, llvm::Function *ca return {result, newBBs}; } -GlobalValue *CompilerUtils::CrossModuleInliner::findCopiedGlobal(GlobalValue &sourceGv, Module &targetModule) { +GlobalValue *compilerutils::CrossModuleInliner::findCopiedGlobal(GlobalValue &sourceGv, Module &targetModule) { checkTargetModule(targetModule); if (auto found = impl->map.find(&sourceGv); found != impl->map.end()) { @@ -461,7 +461,7 @@ llvm::GlobalValue &CrossModuleInliner::defaultGetGlobalInModuleFunc(CrossModuleI assert(inliner.impl && "Called GetGlobalInModule, but the inliner is currently not inlining anything"); // Try to find by name - if (auto *existing = targetModule.getNamedValue(CompilerUtils::CrossModuleInliner::getCrossModuleName(sourceGv))) + if (auto *existing = targetModule.getNamedValue(compilerutils::CrossModuleInliner::getCrossModuleName(sourceGv))) return *existing; auto &mappedTypes = inliner.impl->typeRemapper.mappedTypes; @@ -492,7 +492,7 @@ llvm::GlobalValue &CrossModuleInliner::defaultGetGlobalInModuleFunc(CrossModuleI // Create a function declaration FunctionType *targetFuncTy = FunctionType::get(mappedTy, params, sourceFuncTy->isVarArg()); - auto *newGv = CompilerUtils::cloneFunctionHeader(*callee, targetFuncTy, callee->getAttributes(), &targetModule); + auto *newGv = compilerutils::cloneFunctionHeader(*callee, targetFuncTy, callee->getAttributes(), &targetModule); newGv->setName(newName); return *newGv; } @@ -534,7 +534,7 @@ void CrossModuleInliner::checkTargetModule(llvm::Module &targetModule) { assert(impl->targetMod == &targetModule); } -void CompilerUtils::replaceAllPointerUses(Value *oldPointerValue, Value *newPointerValue, +void compilerutils::replaceAllPointerUses(Value *oldPointerValue, Value *newPointerValue, SmallVectorImpl &toBeRemoved) { // Note: The implementation explicitly supports typed pointers, which // complicates some of the code below. @@ -684,20 +684,82 @@ void CompilerUtils::replaceAllPointerUses(Value *oldPointerValue, Value *newPoin #endif } -Value *CompilerUtils::simplifyingCreateConstGEP1_32(IRBuilder<> &builder, Type *ty, Value *ptr, uint32_t idx) { +Value *compilerutils::simplifyingCreateConstGEP1_32(IRBuilder<> &builder, Type *ty, Value *ptr, uint32_t idx) { // A GEP with a single zero index is redundant with opaque pointers if (idx == 0) return ptr; return builder.CreateConstGEP1_32(ty, ptr, idx); } -Value *CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(IRBuilder<> &builder, Type *ty, Value *ptr, uint32_t idx) { +Value *compilerutils::simplifyingCreateConstInBoundsGEP1_32(IRBuilder<> &builder, Type *ty, Value *ptr, uint32_t idx) { if (idx == 0) return ptr; return builder.CreateConstInBoundsGEP1_32(ty, ptr, idx); } -void CompilerUtils::RegisterPasses(llvm::PassBuilder &PB) { +std::string compilerutils::bb::getLabel(const Function *func) { + if (func->hasName()) + return func->getName().str(); + + ModuleSlotTracker mst(func->getParent()); + mst.incorporateFunction(*func); + + return std::to_string(mst.getLocalSlot(func)); +} + +std::string compilerutils::bb::getLabel(const BasicBlock *bb) { + if (bb->hasName()) + return bb->getName().str(); + + const Function *func = bb->getParent(); + + ModuleSlotTracker mst(func->getParent()); + mst.incorporateFunction(*func); + + return std::to_string(mst.getLocalSlot(bb)); +} + +std::string compilerutils::bb::getLabel(const Value *v) { + if (v->hasName()) + return v->getName().str(); + + if (!isa(v)) + return ""; + + const BasicBlock *bb = dyn_cast(v)->getParent(); + const Function *func = bb->getParent(); + + ModuleSlotTracker mst(func->getParent()); + mst.incorporateFunction(*func); + + return std::to_string(mst.getLocalSlot(v)); +} + +std::string compilerutils::bb::getNamesForBasicBlocks(const ArrayRef blocks, StringRef emptyRetValue, + StringRef prefix) { + std::string s; + if (blocks.empty()) + return emptyRetValue.str(); + + for (auto *bb : blocks) + s += prefix.str() + getLabel(bb); + + return s; +} + +std::string compilerutils::bb::getNamesForBasicBlocks(const SmallSet &blocks, StringRef emptyRetValue, + StringRef prefix) { + std::string s; + if (blocks.empty()) + return emptyRetValue.str(); + + for (auto *bb : blocks) + s += prefix.str() + getLabel(bb); + + return s; +} + +void compilerutils::RegisterPasses(llvm::PassBuilder &PB) { #define HANDLE_PASS(NAME, CREATE_PASS) \ if (innerPipeline.empty() && name == NAME) { \ passMgr.addPass(CREATE_PASS); \ diff --git a/compilerutils/lib/DxilToLlvm.cpp b/compilerutils/lib/DxilToLlvm.cpp index 0ee0c77d62..1c824456b8 100644 --- a/compilerutils/lib/DxilToLlvm.cpp +++ b/compilerutils/lib/DxilToLlvm.cpp @@ -3,7 +3,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -53,7 +53,7 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; -using namespace CompilerUtils; +using namespace compilerutils; #define DEBUG_TYPE "dxil-to-llvm" @@ -330,11 +330,49 @@ struct DxilToLlvmPassImpl { m_typeLower.eraseInstruction(&gepInst); } + void visitCallInst(llvm::CallInst &callInst) { + // Changing return types is currently not supported. + // TypeLowering's lowerFunctionArguments asserts this as well. + assert(callInst.getType() == getConvertedType(callInst.getType())); + + // The operands of createHandleForLib may come from a load that was replaced. + if (!isa_and_nonnull(callInst.getCalledOperand()) || + !callInst.getCalledOperand()->getName().starts_with("dx.op.createHandleForLib")) + return; + + // Find converted args + auto opcode = callInst.getArgOperand(0); + auto resource = callInst.getArgOperand(1); + auto convertedResource = m_typeLower.getValueOptional(resource); + if (convertedResource.empty()) + return; + + assert(convertedResource.size() == 1); + Value *callArgs[] = {opcode, convertedResource[0]}; + + // Replacement of function uses has already occurred via lowerFunctionArguments. + auto *newFn = cast(callInst.getCalledOperand()); + + // Create a new call instruction to the new function + CallInst *newCallInst = CallInst::Create(newFn, callArgs, "", callInst.getIterator()); + newCallInst->setCallingConv(callInst.getCallingConv()); + newCallInst->setTailCallKind(callInst.getTailCallKind()); + newCallInst->setAttributes(callInst.getAttributes()); + // Replace uses of the old call with the new call + callInst.replaceAllUsesWith(newCallInst); + m_typeLower.eraseInstruction(&callInst); + } + void fixFunctionTypes() { for (Function &function : m_module) m_typeLower.lowerFunctionArguments(function); } + void fixGlobalVariables() { + for (GlobalVariable &gv : m_module.globals()) + m_typeLower.lowerGlobalVariable(gv); + } + llvm::PreservedAnalyses run() { m_typeLower.addRule(convertVectorType); @@ -344,8 +382,10 @@ struct DxilToLlvmPassImpl { .add(&DxilToLlvmPassImpl::visitExtractElement) .add(&DxilToLlvmPassImpl::visitShuffleVector) .add(&DxilToLlvmPassImpl::visitGEP) + .add(&DxilToLlvmPassImpl::visitCallInst) .build(); fixFunctionTypes(); + fixGlobalVariables(); visitor.visit(*this, m_module); @@ -367,7 +407,7 @@ template <> struct llvm_dialects::VisitorPayloadProjectiongetArg(argIdx)}); @@ -177,6 +177,33 @@ Function *TypeLowering::lowerFunctionArguments(Function &fn) { return newFn; } +// ===================================================================================================================== +// Lower global variable based on the registered rules. +// +// @param rule : the rule +void TypeLowering::lowerGlobalVariable(llvm::GlobalVariable &oldGV) { + auto convertedTy = convertType(oldGV.getValueType()); + + // Do not replace the global if its type hasn't changed + if (convertedTy.size() == 1 && convertedTy[0] == oldGV.getValueType()) + return; + + assert(convertedTy.size() == 1 && "Only 1:1 type remapping supported now"); + + GlobalVariable *newGV = + new GlobalVariable(*oldGV.getParent(), convertedTy[0], oldGV.isConstant(), oldGV.getLinkage(), + oldGV.hasInitializer() ? oldGV.getInitializer() : nullptr, oldGV.getName(), &oldGV); + + // Copy attributes from the old global variable + newGV->copyAttributesFrom(&oldGV); + + // Replace uses of oldGV with newGV + oldGV.replaceAllUsesWith(newGV); + + // Erase the old global variable + m_gvToErase.push_back(&oldGV); +} + // ===================================================================================================================== // Add a type conversion rule. // @@ -429,6 +456,10 @@ bool TypeLowering::finishCleanup() { fn->eraseFromParent(); m_functionsToErase.clear(); + for (GlobalVariable *gv : m_gvToErase) + gv->eraseFromParent(); + m_gvToErase.clear(); + m_valueMap.clear(); return changed; diff --git a/compilerutils/lib/ValueOriginTracking.cpp b/compilerutils/lib/ValueOriginTracking.cpp index 3ed550f2c3..3caa1c7a77 100644 --- a/compilerutils/lib/ValueOriginTracking.cpp +++ b/compilerutils/lib/ValueOriginTracking.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -34,11 +34,10 @@ #define DEBUG_TYPE "value-origin-tracking" -using namespace CompilerUtils; -using namespace CompilerUtils::ValueTracking; +using namespace compilerutils::ValueTracking; using namespace llvm; -namespace CompilerUtils { +namespace compilerutils { namespace { @@ -472,7 +471,7 @@ void SliceInfo::print(llvm::raw_ostream &OS, bool Compact) const { OS << ")"; } -llvm::raw_ostream &CompilerUtils::ValueTracking::operator<<(llvm::raw_ostream &OS, const SliceInfo &SI) { +llvm::raw_ostream &compilerutils::ValueTracking::operator<<(llvm::raw_ostream &OS, const SliceInfo &SI) { SI.print(OS); return OS; } @@ -491,7 +490,7 @@ void ValueTracking::ValueInfo::print(llvm::raw_ostream &OS, bool Compact) const } } -llvm::raw_ostream &CompilerUtils::ValueTracking::operator<<(llvm::raw_ostream &OS, const ValueInfo &VI) { +llvm::raw_ostream &compilerutils::ValueTracking::operator<<(llvm::raw_ostream &OS, const ValueInfo &VI) { VI.print(OS); return OS; } @@ -848,4 +847,4 @@ void ValueOriginTracker::analyzeValues(ArrayRef Values) { } } -} // namespace CompilerUtils +} // namespace compilerutils diff --git a/compilerutils/lib/ValueOriginTrackingTestPass.cpp b/compilerutils/lib/ValueOriginTrackingTestPass.cpp index 0b957f9b96..bb80cf94a5 100644 --- a/compilerutils/lib/ValueOriginTrackingTestPass.cpp +++ b/compilerutils/lib/ValueOriginTrackingTestPass.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -33,7 +33,7 @@ #include using namespace llvm; -using namespace CompilerUtils; +using namespace compilerutils; namespace { @@ -95,7 +95,7 @@ ValueOriginTracker::ValueOriginAssumptions parseAssumptions(Module &Module, Func } // namespace -namespace CompilerUtils { +namespace compilerutils { llvm::PreservedAnalyses ValueOriginTrackingTestPass::run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager) { @@ -170,4 +170,4 @@ llvm::PreservedAnalyses ValueOriginTrackingTestPass::run(llvm::Module &Module, return PreservedAnalyses::all(); } -} // namespace CompilerUtils +} // namespace compilerutils diff --git a/compilerutils/lib/ValueOriginTrackingTestPass.h b/compilerutils/lib/ValueOriginTrackingTestPass.h index 3505e0c085..8d20600eae 100644 --- a/compilerutils/lib/ValueOriginTrackingTestPass.h +++ b/compilerutils/lib/ValueOriginTrackingTestPass.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -28,7 +28,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" -namespace CompilerUtils { +namespace compilerutils { // Helper pass to enable lit tests of ValueOriginTracker. // Calls to a function called "analyze" triggers an analysis its arguments and outputs the analysis to stdout. @@ -39,4 +39,4 @@ class ValueOriginTrackingTestPass : public llvm::PassInfoMixin using namespace llvm; -using namespace CompilerUtils; +using namespace compilerutils; namespace { @@ -88,7 +88,7 @@ ValueSpecializationInfo parseSpecializeCall(llvm::CallInst &CI) { } // namespace -namespace CompilerUtils { +namespace compilerutils { llvm::PreservedAnalyses ValueSpecializationTestPass::run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager) { @@ -177,4 +177,4 @@ llvm::PreservedAnalyses ValueSpecializationTestPass::run(llvm::Module &Module, return PreservedAnalyses::none(); } -} // namespace CompilerUtils +} // namespace compilerutils diff --git a/compilerutils/lib/ValueSpecializationTestPass.h b/compilerutils/lib/ValueSpecializationTestPass.h index 20465412ef..d9f5c1fec6 100644 --- a/compilerutils/lib/ValueSpecializationTestPass.h +++ b/compilerutils/lib/ValueSpecializationTestPass.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -28,7 +28,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" -namespace CompilerUtils { +namespace compilerutils { // Helper pass to enable lit tests of ValueSpecializer. // Calls to a function named "specialize" trigger value specialization. @@ -39,4 +39,4 @@ class ValueSpecializationTestPass : public llvm::PassInfoMixingetFunction(linkName); diff --git a/docs/DdnDebugPrintf.md b/docs/DdnDebugPrintf.md index 5fea0b6feb..55e983dbb6 100644 --- a/docs/DdnDebugPrintf.md +++ b/docs/DdnDebugPrintf.md @@ -1,7 +1,9 @@ -# Support of NonSemantics.DebugPrintf +# Support of DebugPrintf ## Summary +### NonSemantics.DebugPrintf + The `NonSemantics.DebugPrintf` extended instruction enables `printf`-style functionality in SPIR-V modules. @@ -13,6 +15,12 @@ Vulkan layers. Our internal print debug capability allows us to use `printf` in internal SPIR-V applications that are head-scratching hard to debug. +### Shader Abort (OpAbortKHR) + +The new instruction `OpAbortKHR` from Vulkan extension `VK_KHR_shader_abort` allows to +abort the shader and trigger device loss with passing `Message` through the API which +depends on `debugPrintf` implementation to retrieve data. + ## Interfaces This section describes interfaces between major components: @@ -69,8 +77,18 @@ The `amdpal.format_strings` map contains: passed into `DebugPrintf` * An element with key `.64bit_arguments` whose value is an array of 64-bit integers that serve as a bitmask of arguments that are 64-bit sized - -An example in JSON-ified form: + * An element with key `.is_abort_message` whose value indicates if the string comes + from debug printf or shader abort function. + * An element with key `.data_count` whose value is the total number of 32-bit words + in an abort message + * An element with key `.constant_status` whose value is an array of 64-bit words that + are concatenated to form a bit vector indicating which of the 32-bit words in the + abort message are constant. + * An element with key `.constant` whose value is an array of 32-bit integer constants + in the order in which they appear in the abort message. The length of this array is + equal to the number of set bits with true in the `.constant_status` bit vector. + +An example in JSON-ified form of debug printf: ```json { "amdpal.version": [ .. ], @@ -93,6 +111,33 @@ An example in JSON-ified form: ] } } + +``` +An example in JSON-ified form of shader abort: +```json +{ + "amdpal.version": [ .. ], + "amdpal.pipelines": [ .. ], + "amdpal.format_strings": { + ".version": 1, + ".strings": [ + { + ".index": 12345678, + ".is_abort_message": 1, + ".data_count": 2, + ".constant_status": [0x3], + ".constants": [0x0000000074736574 0x000000006C25203A] + }, + { + ".index": 34567890, + ".is_abort_message": 1, + ".data_count": 2, + ".constant_status": [0x1], + ".constants": [0x0000000074736574] + }, + ] + } +} ``` ### Printf buffer @@ -168,6 +213,11 @@ Most of the implementation is in xgl/icd/api/debug_printf.cpp. > **Note/Todo:** At time of writing, the implementation is limited to handling > only a single pipeline at a time. +#### TDR solution + +To maintain data coherence between the CPU and GPU during a TDR event, allocate the +printf buffer in a shared memory region that is cacheable for the CPU but uncached for the GPU. + ### LLPC #### lgc: debug printf operation @@ -188,8 +238,21 @@ Allowed types for the variable argument operands are: * `i8, i16, i32, i64, half, float, double` * Vectors of the above +#### lgc: shader abort message operation + +The `@lgc.abort.msg` operation represents a printf and abort call for debug purposes. +It is declared as follows: +```llvm +; Function Attrs: nodivergencesource nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @lgc.abort.msg(...) +``` +Allowed types for the variable argument operands are: + +* Vectors of i32 values + #### SPIRVReader +##### lgc.debug.printf The SPIRVReader emits the format string as a global variable in the constant address space and produces a call to `@lgc.debug.printf`. @@ -202,14 +265,27 @@ Example: ... ``` +##### lgc.abort.msg +The SPIRVReader emits a argument and produces a call to `@lgc.abort.msg`. + +Example: +```llvm + ... + // string: "test: %lf %d \n" + call void (...) @lgc.abort.msg(i32 1953719668, i32 1814372410, i32 1680154726, i32 10, i32 undef, i32 0, i32 1, i32 undef) + ... +``` + **Note:** Builder record/replay is not used. #### lgc::LowerDebugPrintf The module pass `LowerDebugPrintf` runs just before `MutateEntryPoint`. -It collects all calls to `@lgc.debug.printf` in the entire module and: +It collects all calls to `@lgc.debug.printf` or `@lgc.abort.msg` in the entire module and: -* Collects the format strings and adds the `amdpal.format_strings` entry to the +* `@lgc.debug.printf` will collect the format strings and adds the `amdpal.format_strings` entry to the + PAL metadata document. +* `@lgc.abort.msg` will collect constants information and adds the `amdpal.format_strings` entry to the PAL metadata document. * Lowers the calls to the required lower-level instructions. diff --git a/imported/llvm-dialects b/imported/llvm-dialects index 50e4ca3a5c..50260f8bdd 160000 --- a/imported/llvm-dialects +++ b/imported/llvm-dialects @@ -1 +1 @@ -Subproject commit 50e4ca3a5c365b0bde36b122cc34256406723049 +Subproject commit 50260f8bdd9ce47b388f5009546a438aba8b9d16 diff --git a/include/vkgcBase.h b/include/vkgcBase.h index 7c5561f143..31c702db41 100644 --- a/include/vkgcBase.h +++ b/include/vkgcBase.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -62,7 +62,9 @@ struct RtIpVersion { // RT IP checkers bool operator==(const RtIpVersion &rhs) const { return std::tie(major, minor) == std::tie(rhs.major, rhs.minor); } + bool operator!=(const RtIpVersion &rhs) const { return !(*this == rhs); } bool operator>=(const RtIpVersion &rhs) const { return std::tie(major, minor) >= std::tie(rhs.major, rhs.minor); } + bool operator<(const RtIpVersion &rhs) const { return std::tie(major, minor) < std::tie(rhs.major, rhs.minor); } bool isRtIp(unsigned rhsMajor, unsigned rhsMinor) const { return std::tie(major, minor) == std::tie(rhsMajor, rhsMinor); } diff --git a/include/vkgcDefs.h b/include/vkgcDefs.h index 6f908b7d29..324baec4c4 100644 --- a/include/vkgcDefs.h +++ b/include/vkgcDefs.h @@ -515,6 +515,7 @@ struct PipelineOptions { bool emulateWideLineStipple; ///< For OGL only, enable line AA stipple. bool enablePointSmooth; ///< For OGL only, enable point smooth mode. bool enableRemapLocation; ///< For OGL only, enables location remapping. + bool enableDepthCompareParam; ///< For OGL only, enable depth compare param descriptor. } glState; const auto &getGlState() const { return glState; } @@ -526,6 +527,7 @@ struct PipelineOptions { ///< eliminated if the write value is 1.0. CompileConstInfo *compileConstInfo; ///< Compile time constant data. unsigned reserved22; + bool padBufferSizeToNextDword; ///< Vulkan only, set if the driver rounds the buffer size up the next dword }; /// Prototype of allocator for output data buffer, used in shader-specific operations. @@ -696,6 +698,13 @@ enum class WaveBreakSize : unsigned { _32x32 = 0x3, ///< Outside a 32x32 pixel region }; +/// Enumerate llvm schedule strategy. +enum class LlvmScheduleStrategy : unsigned { + None = 0, + MaxMemoryClause = 1, // Maximize memory clause + MaxIlp = 2 // Maximize ILP +}; + /// Enumerates various sizing options of subgroup size for NGG primitive shader. enum class NggSubgroupSizingType : unsigned { Auto, ///< Subgroup size is allocated as optimally determined @@ -933,6 +942,9 @@ struct PipelineShaderOptions { /// Force scope for memory barrier (0 - do not force, nonzero - value of Scope enumeration from SPIR-V headers with /// the exception of CrossDevice that cannot be set at all). unsigned forceMemoryBarrierScope; + + /// Choose llvm's instruction scheduling strategy. + LlvmScheduleStrategy scheduleStrategy; }; /// Represents YCbCr sampler meta data in resource descriptor diff --git a/lgc/CMakeLists.txt b/lgc/CMakeLists.txt index 79e2021fd5..e8540aff0b 100644 --- a/lgc/CMakeLists.txt +++ b/lgc/CMakeLists.txt @@ -54,14 +54,11 @@ add_llvm_library(LLVMlgc LINK_COMPONENTS ) llvm_map_components_to_libnames(extra_llvm_libs CompilerUtils Raytracing) -target_link_libraries(LLVMlgc PUBLIC llvm_dialects ${extra_llvm_libs} llpc_version) - -### Cached Project Options ############################################################################################# -option(LLPC_ENABLE_WERROR "Build LLPC with more errors" OFF) +target_link_libraries(LLVMlgc PUBLIC llvm_dialects ${extra_llvm_libs} sharedme_xdl llpc_version) ### Compiler Options ################################################################################################### include(../cmake/CompilerFlags.cmake) -set_compiler_options(LLVMlgc ${LLPC_ENABLE_WERROR}) +set_compiler_options(LLVMlgc) ### TableGen for LGC dialect ########################################################################################### @@ -71,12 +68,8 @@ if (${LLVM_MAIN_REVISION} GREATER_EQUAL 514862) set(LGC_TABLEGEN_FLAGS -DLLVM_HAVE_NODIVERGENCESOURCE_ATTR) endif() -if (EXISTS ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) - set(LGC_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) -else() - set(LGC_TABLEGEN_EXE $) -endif() -set(LGC_TABLEGEN_TARGET llvm-dialects-tblgen) +include(../cmake/DialectsTablegen.cmake) +set_dialects_tablegen_exe(LGC) set(LLVM_TARGET_DEFINITIONS interface/lgc/LgcDialect.td) if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/interface/lgc) @@ -195,7 +188,7 @@ target_sources(LLVMlgc PRIVATE lowering/VertexFetch.cpp lowering/CollectImageOperations.cpp lowering/RegisterMetadataBuilder.cpp -#if LLPC_BUILD_STRIX1 +#if LLPC_BUILD_STRIX1 || LLPC_BUILD_STRIX_HALO lowering/WorkaroundDsSubdwordWrite.cpp #endif lowering/CombineCooperativeMatrix.cpp @@ -287,6 +280,7 @@ target_sources(LLVMlgc PRIVATE util/RegStackUsage.cpp util/StartStopTimer.cpp util/WorkgroupLayout.cpp + util/BufferResource.cpp ) # include/lgc/util diff --git a/lgc/builder/BuilderBase.cpp b/lgc/builder/BuilderBase.cpp index 09300d1233..3d7df77058 100644 --- a/lgc/builder/BuilderBase.cpp +++ b/lgc/builder/BuilderBase.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -71,7 +71,7 @@ Value *BuilderCommon::CreatePtrDiff(Type *ty, Value *lhs, Value *rhs, const Twin // @param instName : Name to give instruction CallInst *BuilderCommon::CreateNamedCall(StringRef funcName, Type *retTy, ArrayRef args, ArrayRef attribs, const Twine &instName) { - return CompilerUtils::createNamedCall(*this, funcName, retTy, args, attribs, instName); + return compilerutils::createNamedCall(*this, funcName, retTy, args, attribs, instName); } // Create an llvm.assume call to annotate the dereferenceable and alignment attributes of the pointer. We only insert diff --git a/lgc/builder/BuilderImpl.cpp b/lgc/builder/BuilderImpl.cpp index febd09669c..e96d2d7cd0 100644 --- a/lgc/builder/BuilderImpl.cpp +++ b/lgc/builder/BuilderImpl.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -74,35 +74,6 @@ Type *BuilderBase::getConditionallyVectorizedTy(Type *elementTy, Type *maybeVecT // @param vector2 : The float vector 2 // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateDotProduct(Value *const vector1, Value *const vector2, const Twine &instName) { - if (vector1->getType()->getScalarType()->isBFloatTy()) { - assert(getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 11); - // Note: v_dot2_bf16_bf16 only respects RTE mode according to HW spec. We must check the specified rounding mode - // before using it. Also, v_dot2_bf16_bf16 doesn't respect signed zeros so we must check NSZ as well. - const auto fp16RoundMode = - getPipelineState()->getShaderModes()->getCommonShaderMode(m_shaderStage.value()).fp16RoundMode; - const auto vectorTy = dyn_cast(vector1->getType()); - if (vectorTy && (fp16RoundMode == FpRoundMode::DontCare || fp16RoundMode == FpRoundMode::Even) && - getFastMathFlags().noSignedZeros()) { - int compCount = vectorTy->getNumElements(); - Value *result = nullptr; - - if (compCount % 2 == 0) { - result = ConstantFP::get(getBFloatTy(), 0.0); - } else { - // If the component count is odd, prefer feeding the last product (odd one out) as initial value. - Value *lhs = CreateExtractElement(vector1, compCount - 1); - Value *rhs = CreateExtractElement(vector2, compCount - 1); - result = CreateFMul(lhs, rhs); - } - - for (int i = 0; i + 1 < compCount; i += 2) { - Value *lhs = CreateShuffleVector(vector1, {i, i + 1}); - Value *rhs = CreateShuffleVector(vector2, {i, i + 1}); - result = CreateIntrinsic(getBFloatTy(), Intrinsic::amdgcn_fdot2_bf16_bf16, {lhs, rhs, result}); - } - return result; - } - } Value *product = CreateFMul(vector1, vector2); if (!isa(product->getType())) @@ -788,7 +759,7 @@ void implementScalarization(Value *nonUniformInstOperand, Value *nonUniformIndex for (Instruction *origInst : instrsToClone) { auto *newInst = origInst->clone(); - newInst->insertBefore(prevInst); + newInst->insertBefore(prevInst->getIterator()); origClonedValuesMap[origInst] = newInst; prevInst = newInst; // Update the operand of the nonUniformInst (for which the waterfall is created) with the new load that we @@ -805,7 +776,7 @@ void implementScalarization(Value *nonUniformInstOperand, Value *nonUniformIndex // Clone the first non-uniform index. auto *origInst = cast(nonUniformIndex); auto *newInst = origInst->clone(); - newInst->insertBefore(prevInst); + newInst->insertBefore(prevInst->getIterator()); origClonedValuesMap[origInst] = newInst; // Update the operands of the cloned instructions. diff --git a/lgc/builder/BuilderRecorder.cpp b/lgc/builder/BuilderRecorder.cpp index 250b74e0e4..e376aea4f2 100644 --- a/lgc/builder/BuilderRecorder.cpp +++ b/lgc/builder/BuilderRecorder.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -2081,6 +2081,8 @@ Instruction *Builder::record(BuilderOpcode opcode, Type *resultTy, ArrayRefsetDoesNotAccessMemory(); break; case BuilderOpcode::ImageSample: + func->addFnAttr(Attribute::Convergent); + LLVM_FALLTHROUGH; case BuilderOpcode::ImageSampleConvert: // Function read and write memory if return is void. if (!resultTy || resultTy->isVoidTy()) diff --git a/lgc/builder/BuilderRecorder.h b/lgc/builder/BuilderRecorder.h index 07940891b6..800a8bade6 100644 --- a/lgc/builder/BuilderRecorder.h +++ b/lgc/builder/BuilderRecorder.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to diff --git a/lgc/builder/BuilderReplayer.cpp b/lgc/builder/BuilderReplayer.cpp index 2c3348c872..fc71383b44 100644 --- a/lgc/builder/BuilderReplayer.cpp +++ b/lgc/builder/BuilderReplayer.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -777,15 +777,18 @@ Value *BuilderReplayer::processCall(unsigned opcode, CallInst *call) { } case BuilderOpcode::SubgroupClusteredReduction: { Builder::GroupArithOp groupArithOp = static_cast(cast(args[0])->getZExtValue()); - return m_builder->CreateSubgroupClusteredReduction(groupArithOp, args[1], args[2]); + unsigned clusterSize = cast(args[2])->getZExtValue(); + return m_builder->CreateSubgroupClusteredReduction(groupArithOp, args[1], clusterSize); } case BuilderOpcode::SubgroupClusteredInclusive: { Builder::GroupArithOp groupArithOp = static_cast(cast(args[0])->getZExtValue()); - return m_builder->CreateSubgroupClusteredInclusive(groupArithOp, args[1], args[2]); + unsigned clusterSize = cast(args[2])->getZExtValue(); + return m_builder->CreateSubgroupClusteredInclusive(groupArithOp, args[1], clusterSize); } case BuilderOpcode::SubgroupClusteredExclusive: { Builder::GroupArithOp groupArithOp = static_cast(cast(args[0])->getZExtValue()); - return m_builder->CreateSubgroupClusteredExclusive(groupArithOp, args[1], args[2]); + unsigned clusterSize = cast(args[2])->getZExtValue(); + return m_builder->CreateSubgroupClusteredExclusive(groupArithOp, args[1], clusterSize); } case BuilderOpcode::SubgroupClusteredMultiExclusive: { Builder::GroupArithOp groupArithOp = static_cast(cast(args[0])->getZExtValue()); diff --git a/lgc/builder/ImageBuilder.cpp b/lgc/builder/ImageBuilder.cpp index 48048837a7..3940edd21f 100644 --- a/lgc/builder/ImageBuilder.cpp +++ b/lgc/builder/ImageBuilder.cpp @@ -649,11 +649,12 @@ Value *BuilderImpl::CreateImageLoad(Type *resultTy, unsigned dim, unsigned flags imageInst = CreateIntrinsic(intrinsicDataTy, intrinsicId, args, nullptr, instName); } else { // Texel buffer descriptor. Use the buffer instruction. + Value *sOffset = getInt32(0); imageDescArgIndex = args.size(); args.push_back(imageDesc); args.push_back(coords[0]); args.push_back(getInt32(0)); - args.push_back(getInt32(0)); + args.push_back(sOffset); args.push_back(getInt32(0)); imageInst = CreateIntrinsic(intrinsicDataTy, Intrinsic::amdgcn_struct_buffer_load_format, args, nullptr, instName); } @@ -874,12 +875,13 @@ Value *BuilderImpl::CreateImageStore(Value *texel, unsigned dim, unsigned flags, texel = CreateInsertElement(Constant::getNullValue(FixedVectorType::get(texelTy, 4)), texel, uint64_t(0)); // Do the buffer store. + Value *sOffset = getInt32(0); args.push_back(texel); imageDescArgIndex = args.size(); args.push_back(imageDesc); args.push_back(coords[0]); args.push_back(getInt32(0)); - args.push_back(getInt32(0)); + args.push_back(sOffset); args.push_back(getInt32(0)); imageStore = CreateIntrinsic(getVoidTy(), Intrinsic::amdgcn_struct_buffer_store_format, args, nullptr, instName); } @@ -2186,8 +2188,9 @@ Value *BuilderImpl::transformImageDesc(Value *imageDesc, bool mustLoad, bool isT if (isa(imageDesc->getType())) return imageDesc; + unsigned texBufSrdSize = 4; // Explicitly load the descriptor from the descriptor pointer - Type *descType = FixedVectorType::get(getInt32Ty(), isTexelBuffer ? 4 : 8); + Type *descType = FixedVectorType::get(getInt32Ty(), isTexelBuffer ? texBufSrdSize : 8); // Use smaller alignment for better load speculation. Value *desc = CreateAlignedLoad(descType, imageDesc, Align(4)); cast(desc)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(getContext(), {})); diff --git a/lgc/builder/MatrixBuilder.cpp b/lgc/builder/MatrixBuilder.cpp index 4d63de7ff3..df47fb2fe0 100644 --- a/lgc/builder/MatrixBuilder.cpp +++ b/lgc/builder/MatrixBuilder.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -28,7 +28,6 @@ * @brief LLPC source file: implementation of matrix Builder methods *********************************************************************************************************************** */ -#include "lgc/LgcDialect.h" #include "lgc/builder/BuilderImpl.h" #define DEBUG_TYPE "lgc-builder-impl-matrix" @@ -344,96 +343,3 @@ Value *BuilderImpl::CreateMatrixInverse(Value *const matrix, const Twine &instNa result->setName(instName); return result; } - -// ===================================================================================================================== -// Convert the element type enum into the corresponding LLVM type. -// -// @param elemType : The element type enum value -// @returns the corresponding LLVM type -Type *BuilderCommon::transCooperativeMatrixElementType(CooperativeMatrixElementType elemType) { - switch (elemType) { - case CooperativeMatrixElementType::Float16: - case CooperativeMatrixElementType::Float16Packed: - return getHalfTy(); - case CooperativeMatrixElementType::Float32: - return getFloatTy(); - case CooperativeMatrixElementType::Int16: - case CooperativeMatrixElementType::BFloat16: - return getInt16Ty(); - case CooperativeMatrixElementType::Int32: - return getInt32Ty(); - case CooperativeMatrixElementType::Int8: - case CooperativeMatrixElementType::Float8: - case CooperativeMatrixElementType::BFloat8: - return getInt8Ty(); - case CooperativeMatrixElementType::Int4: - return getIntNTy(4); - default: - llvm_unreachable("The element type is not supported."); - } -} - -// ===================================================================================================================== -// Get the LGC type of a cooperative matrix with the given element type and layout. -// -// @param elemType : the matrix element type -// @param layout : the matrix layout -// @param kSize : the matrix K size -Type *BuilderCommon::getCooperativeMatrixTy(CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, - unsigned kSize) { - // Note: the layout currently has no influence on the type. In the long run, we should switch to genuinely opaque - // types at the LGC level, and parameterize the type using both the element type and the layout. - - Type *wordTy = transCooperativeMatrixElementType(elemType)->isIntOrIntVectorTy() ? getInt32Ty() : getFloatTy(); - unsigned nDwords = 0; - (void)(nDwords); - switch (layout) { - case CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout: - case CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout: - case CooperativeMatrixLayout::AccumulatorMatrixLayout: - return FixedVectorType::get(wordTy, 8); - case CooperativeMatrixLayout::FactorMatrixLayout: - if (elemType == CooperativeMatrixElementType::Int4) - return FixedVectorType::get(wordTy, 2); - if (elemType == CooperativeMatrixElementType::Int8) - return FixedVectorType::get(wordTy, 4); - return FixedVectorType::get(wordTy, 8); - default: - llvm_unreachable("Type is not supported!"); - } -} - -// ===================================================================================================================== -// Get the bit width of the cooperativeMatrix element type -// -// @param elemType : the matrix element type -unsigned BuilderCommon::getBitWidthOfCooperativeMatrixElement(CooperativeMatrixElementType elemType) { - switch (elemType) { - case lgc::CooperativeMatrixElementType::Float16: - case lgc::CooperativeMatrixElementType::Float16Packed: - case lgc::CooperativeMatrixElementType::BFloat16: - case lgc::CooperativeMatrixElementType::Int16: - return 16; - case lgc::CooperativeMatrixElementType::Float32: - case lgc::CooperativeMatrixElementType::Int32: - return 32; - case lgc::CooperativeMatrixElementType::Int8: - case lgc::CooperativeMatrixElementType::Float8: - case lgc::CooperativeMatrixElementType::BFloat8: - return 8; - case lgc::CooperativeMatrixElementType::Int4: - return 4; - default: - llvm_unreachable("Type is not supported!"); - } -} - -// ===================================================================================================================== -// Whether the type of a cooperative matrix is specified bit width. -// -// @param elemType : the matrix element type -// @param bitWidth : the specified bit width -bool BuilderCommon::isTypeNCooperativeMatrix(CooperativeMatrixElementType elemType, unsigned bitWidth) { - unsigned width = getBitWidthOfCooperativeMatrixElement(elemType); - return width == bitWidth; -} diff --git a/lgc/builder/SubgroupBuilder.cpp b/lgc/builder/SubgroupBuilder.cpp index 05d34b4b48..6b3e4fde5f 100644 --- a/lgc/builder/SubgroupBuilder.cpp +++ b/lgc/builder/SubgroupBuilder.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -398,21 +398,20 @@ Value *BuilderImpl::createSubgroupShuffle(const SubgroupHelperLaneState &state, // Start the WWM section by setting the inactive lanes. Value *const poisonValue = PoisonValue::get(value->getType()); Value *const poisonIndex = PoisonValue::get(index->getType()); - Value *wwmValue = BuilderBase::get(*this).CreateSetInactive(value, poisonValue); - Value *wwmIndex = nullptr; - BuilderBase::MapToSimpleTypeFunc bPermFunc = nullptr; - { - Value *const scaledIndex = CreateMul(index, getInt32(4)); - wwmIndex = BuilderBase::get(*this).CreateSetInactive(scaledIndex, poisonIndex); - bPermFunc = [](BuilderBase &builder, ArrayRef mappedArgs, ArrayRef passthroughArgs) -> Value * { - return builder.CreateIntrinsic(Intrinsic::amdgcn_ds_bpermute, {}, {passthroughArgs[0], mappedArgs[0]}); - }; - } + auto bPermFunc = [](BuilderBase &builder, ArrayRef mappedArgs, + ArrayRef passthroughArgs) -> Value * { + return builder.CreateIntrinsic(Intrinsic::amdgcn_ds_bpermute, {}, {passthroughArgs[0], mappedArgs[0]}); + }; auto permuteFunc = [](BuilderBase &builder, ArrayRef mappedArgs, ArrayRef passthroughArgs) -> Value * { return builder.CreateIntrinsic(builder.getInt32Ty(), Intrinsic::amdgcn_permlane64, {mappedArgs[0]}); }; + + Value *wwmValue = BuilderBase::get(*this).CreateSetInactive(value, poisonValue); + Value *const scaledIndex = CreateMul(index, getInt32(4)); + Value *wwmIndex = BuilderBase::get(*this).CreateSetInactive(scaledIndex, poisonIndex); + auto swapped = CreateMapToSimpleType(permuteFunc, wwmValue, {}); auto bPermSameHalf = CreateMapToSimpleType(bPermFunc, wwmValue, wwmIndex); @@ -566,12 +565,12 @@ Value *BuilderImpl::CreateSubgroupShuffleDown(Value *const value, Value *const d // // @param groupArithOp : The group arithmetic operation. // @param value : An LLVM value. -// @param inClusterSize : The expected cluster size. +// @param inClusterSize : The expected cluster size or full wavesize if 0. // @param instName : Name to give final instruction. Value *BuilderImpl::CreateSubgroupClusteredReduction(GroupArithOp groupArithOp, Value *const value, - Value *const inClusterSize, const Twine &instName) { - assert(isa(inClusterSize)); - unsigned clusterSize = cast(inClusterSize)->getZExtValue(); + unsigned inClusterSize, const Twine &instName) { + unsigned clusterSize = (inClusterSize == 0) ? getShaderWaveSize() : inClusterSize; + assert(isPowerOf2_32(clusterSize)); const unsigned waveSize = getShaderWaveSize(); clusterSize = std::min(clusterSize, waveSize); @@ -649,12 +648,11 @@ Value *BuilderImpl::CreateSubgroupClusteredReduction(GroupArithOp groupArithOp, // // @param groupArithOp : The group arithmetic operation. // @param value : An LLVM value. -// @param inClusterSize : The expected cluster size. +// @param inClusterSize : The expected cluster size or full wavesize if 0. // @param instName : Name to give final instruction. Value *BuilderImpl::CreateSubgroupClusteredInclusive(GroupArithOp groupArithOp, Value *const value, - Value *const inClusterSize, const Twine &instName) { - assert(isa(inClusterSize)); - unsigned clusterSize = cast(inClusterSize)->getZExtValue(); + unsigned inClusterSize, const Twine &instName) { + unsigned clusterSize = (inClusterSize == 0) ? getShaderWaveSize() : inClusterSize; assert(isPowerOf2_32(clusterSize)); const unsigned waveSize = getShaderWaveSize(); clusterSize = std::min(clusterSize, waveSize); @@ -723,12 +721,11 @@ Value *BuilderImpl::CreateSubgroupClusteredInclusive(GroupArithOp groupArithOp, // // @param groupArithOp : The group arithmetic operation. // @param value : An LLVM value. -// @param inClusterSize : The expected cluster size. +// @param inClusterSize : The expected cluster size or full wavesize if 0. // @param instName : Name to give final instruction. Value *BuilderImpl::CreateSubgroupClusteredExclusive(GroupArithOp groupArithOp, Value *const value, - Value *const inClusterSize, const Twine &instName) { - assert(isa(inClusterSize)); - unsigned clusterSize = cast(inClusterSize)->getZExtValue(); + unsigned inClusterSize, const Twine &instName) { + unsigned clusterSize = (inClusterSize == 0) ? getShaderWaveSize() : inClusterSize; assert(isPowerOf2_32(clusterSize)); const unsigned waveSize = getShaderWaveSize(); clusterSize = std::min(clusterSize, waveSize); diff --git a/lgc/disassembler/CMakeLists.txt b/lgc/disassembler/CMakeLists.txt index aec33e8f6b..49e293c2ae 100644 --- a/lgc/disassembler/CMakeLists.txt +++ b/lgc/disassembler/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2021-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -43,7 +43,7 @@ LINK_COMPONENTS ### Compiler Options ################################################################################################### include(../../cmake/CompilerFlags.cmake) -set_compiler_options(LLVMlgcdis ${LLPC_ENABLE_WERROR}) +set_compiler_options(LLVMlgcdis) ### Defines/Includes/Sources ########################################################################################### target_include_directories(LLVMlgcdis diff --git a/lgc/disassembler/Disassembler.cpp b/lgc/disassembler/Disassembler.cpp index 31e4fdc518..39a9328038 100644 --- a/lgc/disassembler/Disassembler.cpp +++ b/lgc/disassembler/Disassembler.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2021-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -111,20 +111,28 @@ class ObjDisassembler { StringSaver m_strings{m_stringsAlloc}; public: - static void disassembleObject(MemoryBufferRef data, raw_ostream &ostream) { + static Error disassembleObject(MemoryBufferRef data, raw_ostream &ostream) { ObjDisassembler objDis(data, ostream); - objDis.run(); + return objDis.run(); + } + + // Disassemble a single symbol within an object (typically ELF) into ostream. + // Returns Error::success() if no errors occurred, otherwise returns the Error object. + static Error disassembleSingleSymbol(MemoryBufferRef data, raw_ostream &ostream, StringRef symbolName) { + ObjDisassembler objDis(data, ostream); + return objDis.run(symbolName); } private: ObjDisassembler(MemoryBufferRef data, raw_ostream &ostream) : m_data(data), m_ostream(ostream) {} - void run(); - void processSection(ELFSectionRef sectionRef); - void gatherSectionSymbols(ELFSectionRef sectionRef, SymbolPool &symbols); + Error run(StringRef symbolName = StringRef()); + Error processSection(ELFSectionRef sectionRef, std::optional symbolRef = std::nullopt); + Error gatherSectionSymbols(ELFSectionRef sectionRef, bool skipDirectiveEmission, SymbolPool &symbols); void gatherRelocs(ELFSectionRef sectionRef, std::vector &relocs); - void tryDisassembleSection(ELFSectionRef sectionRef, unsigned sectType, unsigned sectFlags, bool outputting, - SymbolPool &symbols, ArrayRef relocs); + Error tryDisassembleSection(ELFSectionRef sectionRef, unsigned sectType, unsigned sectFlags, bool outputting, + SymbolPool &symbols, ArrayRef relocs, + std::optional symbolRef = std::nullopt); bool disasmInstSeq(SmallVectorImpl &seq, uint64_t offset, bool outputting, StringRef contents, SymbolPool &symbols); bool disasmLongJump(SmallVectorImpl &seq, const InstOrDirective &inst, bool outputting, @@ -134,8 +142,8 @@ class ObjDisassembler { InstOrDirective disasmInst(uint64_t offset, StringRef contents); void addBinaryEncodingComment(raw_ostream &stream, unsigned instAlignment, ArrayRef instBytes); void outputInst(InstOrDirective inst, unsigned instAlignment); - void outputData(bool outputting, uint64_t offset, StringRef data, ArrayRef &relocs); - void outputRelocs(bool outputting, uint64_t offset, uint64_t size, ArrayRef &relocs); + Error outputData(bool outputting, uint64_t offset, StringRef data, ArrayRef &relocs); + Error outputRelocs(bool outputting, uint64_t offset, uint64_t size, ArrayRef &relocs); size_t decodeNote(StringRef data); MCSymbol *getOrCreateSymbol(SymbolPool &symbols, uint64_t offset, Twine name = {}, unsigned type = ELF::STT_NOTYPE); @@ -147,7 +155,7 @@ class ObjDisassembler { // ===================================================================================================================== // Disassemble an archive of ELFs. We put the disassembled code into a new archive with same member // names with ".S" suffix. -static void disassembleArchive(MemoryBufferRef data, raw_ostream &ostream) { +static Error disassembleArchive(MemoryBufferRef data, raw_ostream &ostream) { Error err = Error::success(); SmallVector disassembledMembers; SmallVector> strBuffers; @@ -169,7 +177,9 @@ static void disassembleArchive(MemoryBufferRef data, raw_ostream &ostream) { nameBuffer += ".S"; raw_svector_ostream disasmStream(disBuffer); disasmStream << "// Member " << *name << ":\n"; - ObjDisassembler::disassembleObject(*contents, disasmStream); + err = ObjDisassembler::disassembleObject(*contents, disasmStream); + if (err) + break; disassembledMembers.emplace_back(MemoryBufferRef(disBuffer, nameBuffer)); } } @@ -184,16 +194,16 @@ static void disassembleArchive(MemoryBufferRef data, raw_ostream &ostream) { ostream << (*newArchive)->getBuffer(); } - if (err) - report_fatal_error(std::move(err)); + return err; } // ===================================================================================================================== -// Disassemble an ELF object into ostream. Does report_fatal_error on error. +// Disassemble an ELF object into ostream. // // @param data : The object file contents // @param ostream : The stream to disassemble into -void lgc::disassembleObject(MemoryBufferRef data, raw_ostream &ostream) { +// @returns : Error::success() if no errors occurred, otherwise returns the Error object +Error lgc::disassembleObject(MemoryBufferRef data, raw_ostream &ostream) { // Initialize targets and assembly printers/parsers. InitializeAllTargetInfos(); InitializeAllTargetMCs(); @@ -201,30 +211,48 @@ void lgc::disassembleObject(MemoryBufferRef data, raw_ostream &ostream) { if (data.getBuffer().starts_with("!\n")) { // Disassemble archive of ELFs. - disassembleArchive(data, ostream); - return; + return disassembleArchive(data, ostream); } // Attempt to disassemble ELF. - ObjDisassembler::disassembleObject(data, ostream); + return ObjDisassembler::disassembleObject(data, ostream); +} + +// Disassemble a single symbol within an object (typically ELF) into ostream. +// +// @param data : The object file contents +// @param ostream : The stream to disassemble into +// @param symbolName : symbol to disassemble +// @returns : Error::success() if no errors occurred, otherwise returns the Error object +Error lgc::disassembleSingleSymbol(MemoryBufferRef data, raw_ostream &ostream, StringRef symbolName) { + // Initialize targets and assembly printers/parsers. + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllDisassemblers(); + + // Attempt to disassemble symbol. + return ObjDisassembler::disassembleSingleSymbol(data, ostream, symbolName); } // ===================================================================================================================== -// Run the object disassembler to disassemble the object. Does report_fatal_error on error. -void ObjDisassembler::run() { +// Run the object disassembler to disassemble the object or symbol. +// +// @param symbolName (optional) : symbol to disassemble +// @returns : Error::success() if no errors occurred, otherwise returns the Error object +Error ObjDisassembler::run(StringRef symbolName) { // Decode the object file. Expected> expectedObjFile = ObjectFile::createELFObjectFile(m_data); - if (!expectedObjFile) - report_fatal_error(m_data.getBufferIdentifier() + ": Cannot decode ELF object file"); + if (Error E = expectedObjFile.takeError()) + return createStringError(Twine("Cannot decode ELF object file: ") + toString(std::move(E))); if (!isa(&*expectedObjFile.get())) - report_fatal_error(m_data.getBufferIdentifier() + ": Is not ELF object file"); + return createStringError("Is not ELF object file"); m_objFile.reset(cast(expectedObjFile.get().release())); // Figure out the target triple from the object file, and get features. Triple triple = m_objFile->makeTriple(); Expected expectedFeatures = m_objFile->getFeatures(); if (!expectedFeatures) - report_fatal_error(expectedFeatures.takeError()); + return expectedFeatures.takeError(); SubtargetFeatures features = *expectedFeatures; // Get the target specific parser. @@ -232,45 +260,46 @@ void ObjDisassembler::run() { m_tripleName = triple.getTriple(); m_target = TargetRegistry::lookupTarget(m_tripleName, error); if (!m_target) - report_fatal_error(m_objFile->getFileName() + ": '" + m_tripleName + "': " + error); + return createStringError("'" + m_tripleName + "': " + error); // Get the CPU name. std::optional mcpu = m_objFile->tryGetCPUName(); if (!mcpu) - report_fatal_error(m_objFile->getFileName() + ": Cannot get CPU name"); + return createStringError("Cannot get CPU name"); - // Output the required llvm-mc command as a comment. - m_ostream << "// llvm-mc -triple=" << m_tripleName << " -mcpu=" << mcpu << "\n"; + // Output the required llvm-mc command as a comment unless we're only disassembling a single symbol. + if (symbolName.empty()) + m_ostream << "// llvm-mc -triple=" << m_tripleName << " -mcpu=" << mcpu << "\n"; // Set up other objects required for disassembly. std::unique_ptr regInfo(m_target->createMCRegInfo(m_tripleName)); if (!regInfo) - report_fatal_error(m_data.getBufferIdentifier() + ": No register info for target"); + return createStringError("No register info for target"); MCTargetOptions targetOptions{}; targetOptions.AsmVerbose = true; std::unique_ptr asmInfo(m_target->createMCAsmInfo(*regInfo, m_tripleName, targetOptions)); if (!asmInfo) - report_fatal_error(m_data.getBufferIdentifier() + ": No assembly info for target"); + return createStringError("No assembly info for target"); m_subtargetInfo.reset(m_target->createMCSubtargetInfo(m_tripleName, *mcpu, features.getString())); if (!m_subtargetInfo) - report_fatal_error(m_data.getBufferIdentifier() + ": No subtarget info for target"); + return createStringError("No subtarget info for target"); std::unique_ptr instrInfo(m_target->createMCInstrInfo()); if (!instrInfo) - report_fatal_error(m_data.getBufferIdentifier() + ": No instruction info for target"); + return createStringError("No instruction info for target"); MCContext context(triple, asmInfo.get(), regInfo.get(), m_subtargetInfo.get(), nullptr, &targetOptions); std::unique_ptr objFileInfo(m_target->createMCObjectFileInfo(context, /*PIC=*/false)); if (!objFileInfo) - report_fatal_error("No MC object file info"); + return createStringError("No MC object file info"); context.setObjectFileInfo(objFileInfo.get()); m_context = &context; m_instDisassembler.reset(m_target->createMCDisassembler(*m_subtargetInfo, *m_context)); if (!m_instDisassembler) - report_fatal_error(m_data.getBufferIdentifier() + ": No disassembler for target"); + return createStringError("No disassembler for target"); m_instPrinter = m_target->createMCInstPrinter(triple, asmInfo->getAssemblerDialect(), *asmInfo, *instrInfo, *regInfo); if (!m_instPrinter) - report_fatal_error(m_data.getBufferIdentifier() + ": No instruction printer for target"); + return createStringError("No instruction printer for target"); auto fostream = std::make_unique(m_ostream); #if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 505779 @@ -279,32 +308,67 @@ void ObjDisassembler::run() { #else m_streamer.reset(m_target->createAsmStreamer(*m_context, std::move(fostream), m_instPrinter, nullptr, nullptr)); #endif - // Process each section. - for (ELFSectionRef sectionRef : m_objFile->sections()) - processSection(sectionRef); + + if (!symbolName.empty()) { + for (ELFSymbolRef symbolRef : m_objFile->symbols()) { + Expected expectedCurrSymbolName = symbolRef.getName(); + if (!expectedCurrSymbolName) + return expectedCurrSymbolName.takeError(); + + if (*expectedCurrSymbolName == symbolName) { + Expected expectedSection = symbolRef.getSection(); + if (!expectedSection) + return expectedSection.takeError(); + + return processSection(*expectedSection.get(), symbolRef); + } + } + + return createStringError(symbolName + ": Symbol not found!"); + } else { + for (ELFSectionRef sectionRef : m_objFile->sections()) { + Error err = processSection(sectionRef); + if (err) + return err; + } + } + + return Error::success(); } // ===================================================================================================================== // Disassemble one section. // // @param sectionRef : The section to disassemble -void ObjDisassembler::processSection(ELFSectionRef sectionRef) { +// @param symbolRef (optional) : symbol to disassemble +Error ObjDisassembler::processSection(ELFSectionRef sectionRef, std::optional symbolRef) { // Omit certain ELF sections. unsigned sectType = sectionRef.getType(); if (sectType == ELF::SHT_NULL || sectType == ELF::SHT_STRTAB || sectType == ELF::SHT_SYMTAB || sectType == ELF::SHT_REL || sectType == ELF::SHT_RELA) - return; + return Error::success(); // Switch the streamer to the section. - m_streamer->addBlankLine(); unsigned sectFlags = sectionRef.getFlags(); - MCSection *sect = m_context->getELFSection(cantFail(sectionRef.getName()), sectType, sectFlags); - m_streamer->switchSection(sect); + Expected expectedSectionName = sectionRef.getName(); + if (!expectedSectionName) + return expectedSectionName.takeError(); + MCSection *sect = m_context->getELFSection(*expectedSectionName, sectType, sectFlags); + + if (symbolRef) { + // Don't output the section when disassembling a single symbol. + m_streamer->switchSectionNoPrint(sect); + } else { + m_streamer->addBlankLine(); + m_streamer->switchSection(sect); + } // Create all symbols in this section. Also emit directives for symbol type and size, - // adding a synthesized label for the end of the symbol. + // adding a synthesized label for the end of the symbol unless we're only disassembling a single symbol. SymbolPool symbols; - gatherSectionSymbols(sectionRef, symbols); + Error err = gatherSectionSymbols(sectionRef, symbolRef.has_value(), symbols); + if (err) + return err; // Collect and sort the relocs for the section. std::vector relocs; @@ -333,7 +397,10 @@ void ObjDisassembler::processSection(ELFSectionRef sectionRef) { // Disassemble the section contents. size_t prevNumSymbols = symbols.symbols.size(); stable_sort(symbols.symbols); // Stable sort as there may be duplicate addresses. - tryDisassembleSection(sectionRef, sectType, sectFlags, outputting, symbols, relocs); + err = tryDisassembleSection(sectionRef, sectType, sectFlags, outputting, symbols, relocs, symbolRef); + if (err) + return err; + if (outputting) break; // Done final outputting pass. @@ -346,43 +413,67 @@ void ObjDisassembler::processSection(ELFSectionRef sectionRef) { // the next pass. outputting = (symbols.symbols.size() == prevNumSymbols); } + + return Error::success(); } // ===================================================================================================================== -// Create all symbols in the given section. Also emit directives for symbol type and size. +// Create all symbols in the given section. +// If skipDirectiveEmission is false, also emit directives for symbol type and size. // The size is an expression endSym-sym where endSym is a synthesized label at the end of the function. // // @param sectionRef : The section being disassembled +// @param skipDirectiveEmission : True to skip synthesizing endSym label and outputting .type and .size directives // @param [out] symbols : Symbols to populate -void ObjDisassembler::gatherSectionSymbols(ELFSectionRef sectionRef, SymbolPool &symbols) { +Error ObjDisassembler::gatherSectionSymbols(ELFSectionRef sectionRef, bool skipDirectiveEmission, SymbolPool &symbols) { for (ELFSymbolRef symbolRef : m_objFile->symbols()) { - if (cantFail(symbolRef.getSection()) != sectionRef) + Expected expectedSection = symbolRef.getSection(); + if (!expectedSection) + return expectedSection.takeError(); + + if (*expectedSection != sectionRef) continue; - uint64_t offset = cantFail(symbolRef.getValue()); - StringRef name = cantFail(symbolRef.getName()); + Expected expectedOffset = symbolRef.getValue(); + if (!expectedOffset) + return expectedOffset.takeError(); + uint64_t offset = *expectedOffset; + + Expected expectedSymbolName = symbolRef.getName(); + if (!expectedSymbolName) + return expectedSymbolName.takeError(); + StringRef name = *expectedSymbolName; + + Expected expectedSectionContents = sectionRef.getContents(); + if (!expectedSectionContents) + return expectedSectionContents.takeError(); + unsigned type = symbolRef.getELFType(); MCSymbol *sym = getOrCreateSymbol(symbols, offset, name, type); - switch (type) { - case ELF::STT_FUNC: - m_streamer->emitSymbolAttribute(sym, MCSA_ELF_TypeFunction); - break; - case ELF::STT_OBJECT: - m_streamer->emitSymbolAttribute(sym, MCSA_ELF_TypeObject); - break; - } + if (!skipDirectiveEmission) { + switch (type) { + case ELF::STT_FUNC: + m_streamer->emitSymbolAttribute(sym, MCSA_ELF_TypeFunction); + break; + case ELF::STT_OBJECT: + m_streamer->emitSymbolAttribute(sym, MCSA_ELF_TypeObject); + break; + } - if (uint64_t size = symbolRef.getSize()) { - uint64_t endOffset = offset + size; - if (endOffset <= cantFail(sectionRef.getContents()).size()) { - MCSymbol *endSym = getOrCreateSymbol(symbols, endOffset, Twine(name) + "_symend"); - const MCExpr *sizeExpr = MCBinaryExpr::createSub(MCSymbolRefExpr::create(endSym, *m_context), - MCSymbolRefExpr::create(sym, *m_context), *m_context); - m_streamer->emitELFSize(sym, sizeExpr); + if (uint64_t size = symbolRef.getSize()) { + uint64_t endOffset = offset + size; + if (endOffset <= expectedSectionContents->size()) { + MCSymbol *endSym = getOrCreateSymbol(symbols, endOffset, Twine(name) + "_symend"); + const MCExpr *sizeExpr = MCBinaryExpr::createSub(MCSymbolRefExpr::create(endSym, *m_context), + MCSymbolRefExpr::create(sym, *m_context), *m_context); + m_streamer->emitELFSize(sym, sizeExpr); + } } } } + + return Error::success(); } // ===================================================================================================================== @@ -412,9 +503,11 @@ void ObjDisassembler::gatherRelocs(ELFSectionRef sectionRef, std::vector relocs) { +// @param symbolRef (optional) : symbol to disassemble +Error ObjDisassembler::tryDisassembleSection(ELFSectionRef sectionRef, unsigned sectType, unsigned sectFlags, + bool outputting, SymbolPool &symbols, + ArrayRef relocs, + std::optional symbolRef) { bool isCode = sectFlags & ELF::SHF_EXECINSTR; bool isNote = sectType == ELF::SHT_NOTE; @@ -423,9 +516,41 @@ void ObjDisassembler::tryDisassembleSection(ELFSectionRef sectionRef, unsigned s instAlignment = m_context->getAsmInfo()->getMinInstAlignment(); // Get the section contents, and disassemble until nothing left. - StringRef contents = cantFail(sectionRef.getContents()); + Expected expectedSectionContents = sectionRef.getContents(); + if (!expectedSectionContents) + return expectedSectionContents.takeError(); + StringRef contents = *expectedSectionContents; size_t offset = 0, lastOffset = 0; size_t nextSymbol = 0; + size_t endAddr = contents.size(); + + if (symbolRef) { + bool startingSymbolFound = false; + size_t startingSymbol = 0; + size_t symbolSize = symbolRef->getSize(); + Expected expectedSymbolName = symbolRef->getName(); + if (!expectedSymbolName) + return expectedSymbolName.takeError(); + StringRef symbolName = *expectedSymbolName; + + for (size_t i = 0; i < symbols.symbols.size(); i++) { + SymbolInfoTy currSymbol = symbols.symbols[i]; + + if (currSymbol.Name == symbolName) { + startingSymbol = i; + startingSymbolFound = true; + break; + } + } + + if (!startingSymbolFound) + return createStringError(symbolName + ": Symbol not found!"); + + offset = symbols.symbols[startingSymbol].Addr; + lastOffset = offset; + endAddr = offset + symbolSize; + nextSymbol = startingSymbol; + } // The current sequence of instructions, if any. // In the table-jump sequence, currently seen as the longest one, there @@ -434,7 +559,7 @@ void ObjDisassembler::tryDisassembleSection(ELFSectionRef sectionRef, unsigned s SmallVector instSeq; for (;;) { - size_t endOffset = contents.size(); + size_t endOffset = endAddr; if (nextSymbol != symbols.symbols.size() && symbols.symbols[nextSymbol].Addr < endOffset) endOffset = symbols.symbols[nextSymbol].Addr; @@ -442,7 +567,9 @@ void ObjDisassembler::tryDisassembleSection(ELFSectionRef sectionRef, unsigned s // We're about to emit a symbol or finish the section. // If there is any remaining non-disassemblable data, output it. if (lastOffset != offset) { - outputData(outputting, lastOffset, contents.slice(lastOffset, offset), relocs); + Error err = outputData(outputting, lastOffset, contents.slice(lastOffset, offset), relocs); + if (err) + return err; lastOffset = offset; } @@ -457,7 +584,7 @@ void ObjDisassembler::tryDisassembleSection(ELFSectionRef sectionRef, unsigned s continue; } - if (offset == contents.size()) + if (offset == endAddr) break; } @@ -489,11 +616,16 @@ void ObjDisassembler::tryDisassembleSection(ELFSectionRef sectionRef, unsigned s // Got a disassemblable instruction. // First output any non-disassemblable data up to this point. - if (lastOffset != offset) - outputData(outputting, lastOffset, contents.slice(lastOffset, offset), relocs); + if (lastOffset != offset) { + Error err = outputData(outputting, lastOffset, contents.slice(lastOffset, offset), relocs); + if (err) + return err; + } // Output reloc. - outputRelocs(outputting, offset, inst.bytes.size(), relocs); + Error err = outputRelocs(outputting, offset, inst.bytes.size(), relocs); + if (err) + return err; if (outputting) outputInst(inst, instAlignment); @@ -501,6 +633,8 @@ void ObjDisassembler::tryDisassembleSection(ELFSectionRef sectionRef, unsigned s offset += inst.bytes.size(); lastOffset = offset; } + + return Error::success(); } // ===================================================================================================================== @@ -762,8 +896,8 @@ void ObjDisassembler::outputInst(InstOrDirective inst, unsigned instAlignment) { // @param offset : Offset in section // @param data : Bytes of data // @param [in/out] relocs : ArrayRef of relocs, bumped on output past relocs that have been consumed -void ObjDisassembler::outputData(bool outputting, uint64_t offset, StringRef data, - ArrayRef &relocs) { +Error ObjDisassembler::outputData(bool outputting, uint64_t offset, StringRef data, + ArrayRef &relocs) { // Check whether the data is mostly ASCII, possibly with a terminating 0. size_t asciiCount = 0; for (char ch : data) { @@ -773,8 +907,11 @@ void ObjDisassembler::outputData(bool outputting, uint64_t offset, StringRef dat bool isAscii = asciiCount * 10 >= data.size() * 9; while (!data.empty()) { - if (!relocs.empty() && relocs[0].getOffset() == offset) - outputRelocs(outputting, offset, 1, relocs); + if (!relocs.empty() && relocs[0].getOffset() == offset) { + Error err = outputRelocs(outputting, offset, 1, relocs); + if (err) + return err; + } // Only go as far as the next reloc. size_t size = data.size(); @@ -816,6 +953,8 @@ void ObjDisassembler::outputData(bool outputting, uint64_t offset, StringRef dat offset += size; data = data.drop_front(size); } + + return Error::success(); } // ===================================================================================================================== @@ -826,8 +965,8 @@ void ObjDisassembler::outputData(bool outputting, uint64_t offset, StringRef dat // @param offset : Offset in section // @param size : Size of range to output relocs for // @param [in/out] relocs : ArrayRef of relocs, bumped on output past relocs that have been consumed -void ObjDisassembler::outputRelocs(bool outputting, uint64_t offset, uint64_t size, - ArrayRef &relocs) { +Error ObjDisassembler::outputRelocs(bool outputting, uint64_t offset, uint64_t size, + ArrayRef &relocs) { while (!relocs.empty() && relocs[0].getOffset() < offset + size) { if (outputting) { // Start with a '$' reference. @@ -843,12 +982,18 @@ void ObjDisassembler::outputRelocs(bool outputting, uint64_t offset, uint64_t si relocs[0].getTypeName(relocName); const MCExpr *tgtExpr = nullptr; auto symRef = relocs[0].getSymbol(); - if (symRef != m_objFile->symbol_end()) - tgtExpr = MCSymbolRefExpr::create(m_context->getOrCreateSymbol(cantFail(symRef->getName())), *m_context); + if (symRef != m_objFile->symbol_end()) { + Expected expectedSymbolName = symRef->getName(); + if (!expectedSymbolName) + return expectedSymbolName.takeError(); + tgtExpr = MCSymbolRefExpr::create(m_context->getOrCreateSymbol(*expectedSymbolName), *m_context); + } m_streamer->emitRelocDirective(*offsetExpr, relocName, tgtExpr, {}, *m_subtargetInfo); } relocs = relocs.drop_front(1); } + + return Error::success(); } // ===================================================================================================================== diff --git a/lgc/include/lgc/builder/BuilderImpl.h b/lgc/include/lgc/builder/BuilderImpl.h index abce37e4a4..6832f3a5ae 100644 --- a/lgc/include/lgc/builder/BuilderImpl.h +++ b/lgc/include/lgc/builder/BuilderImpl.h @@ -757,15 +757,15 @@ class BuilderImpl : public BuilderDefs { // Create a subgroup clustered reduction. llvm::Value *CreateSubgroupClusteredReduction(GroupArithOp groupArithOp, llvm::Value *const value, - llvm::Value *const clusterSize, const llvm::Twine &instName = ""); + unsigned clusterSize, const llvm::Twine &instName = ""); // Create a subgroup clustered inclusive scan. llvm::Value *CreateSubgroupClusteredInclusive(GroupArithOp groupArithOp, llvm::Value *const value, - llvm::Value *const clusterSize, const llvm::Twine &instName = ""); + unsigned clusterSize, const llvm::Twine &instName = ""); // Create a subgroup clustered exclusive scan. llvm::Value *CreateSubgroupClusteredExclusive(GroupArithOp groupArithOp, llvm::Value *const value, - llvm::Value *const clusterSize, const llvm::Twine &instName = ""); + unsigned clusterSize, const llvm::Twine &instName = ""); // Create a subgroup clustered multi exclusive scan. llvm::Value *CreateSubgroupClusteredMultiExclusive(GroupArithOp groupArithOp, llvm::Value *const value, diff --git a/lgc/include/lgc/lowering/AddBufferOperationMetadata.h b/lgc/include/lgc/lowering/AddBufferOperationMetadata.h index ee6747c6c2..47d4c25e4f 100644 --- a/lgc/include/lgc/lowering/AddBufferOperationMetadata.h +++ b/lgc/include/lgc/lowering/AddBufferOperationMetadata.h @@ -38,6 +38,8 @@ namespace lgc { +class LoadBufferDescOp; +class LoadStridedBufferDescOp; // ===================================================================================================================== // Represents the pass of LGC lowering operations for buffer operations class AddBufferOperationMetadata : public llvm::PassInfoMixin { @@ -53,9 +55,13 @@ class AddBufferOperationMetadata : public llvm::PassInfoMixin { diff --git a/lgc/include/lgc/lowering/LowerBufferOperations.h b/lgc/include/lgc/lowering/LowerBufferOperations.h index 73c531346c..f88a232756 100644 --- a/lgc/include/lgc/lowering/LowerBufferOperations.h +++ b/lgc/include/lgc/lowering/LowerBufferOperations.h @@ -80,7 +80,7 @@ class BufferOpLowering { }; public: - BufferOpLowering(CompilerUtils::TypeLowering &typeLowering, PipelineState &pipelineState, + BufferOpLowering(compilerutils::TypeLowering &typeLowering, PipelineState &pipelineState, llvm::UniformityInfo &uniformityInfo); static void registerVisitors(llvm_dialects::VisitorBuilder &builder); @@ -132,7 +132,7 @@ class BufferOpLowering { const llvm::function_ref callback); llvm::Value *createLoadDesc(llvm::Value *buffAddress, bool forceRawView, bool isCompact); - CompilerUtils::TypeLowering &m_typeLowering; + compilerutils::TypeLowering &m_typeLowering; BuilderImpl m_builder; PipelineState &m_pipelineState; diff --git a/lgc/include/lgc/lowering/LowerCooperativeMatrix.h b/lgc/include/lgc/lowering/LowerCooperativeMatrix.h index df9b15670f..02b935c9c8 100644 --- a/lgc/include/lgc/lowering/LowerCooperativeMatrix.h +++ b/lgc/include/lgc/lowering/LowerCooperativeMatrix.h @@ -31,6 +31,7 @@ #pragma once #include "SystemValues.h" #include "lgc/Builder.h" +#include "lgc/LgcXdlTypes.h" #include "lgc/lowering/LgcLowering.h" #include "lgc/state/PipelineShaders.h" #include "lgc/state/PipelineState.h" @@ -39,6 +40,7 @@ namespace lgc { +namespace xdl { class CooperativeRowAccLoadOp; class CooperativeRowAccStoreOp; class CooperativeRowAccFinalizeModeOp; @@ -61,6 +63,7 @@ class CooperativeMatrixTimesScalarOp; class CooperativeMatrixMulAddOp; class CooperativeMatrixPackOp; class CooperativeMatrixUnPackOp; +} // namespace xdl // ===================================================================================================================== // Pass to lower coopMatrix calls @@ -108,86 +111,88 @@ class LowerCooperativeMatrix : public Patch, public llvm::PassInfoMixinv8*coopMatrix_data as two 16bits elements packed. llvm::Value *convFlatVecToCoopMatrixVec(BuilderCommon &builder, llvm::Value *vecValue, - CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, - unsigned kSize = 16); + xdl::CooperativeMatrixElementType elemType, + xdl::CooperativeMatrixLayout layout, unsigned kSize = 16); // Convert cooperativeMatrix vec data to vec data. llvm::Value *convCoopMatrixVecToFlatVec(BuilderCommon &builder, llvm::Value *matrixValue, - CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, - unsigned kSize = 16); + xdl::CooperativeMatrixElementType elemType, + xdl::CooperativeMatrixLayout layout, unsigned kSize = 16); // Create cooperative matrix convert operation without reshape operation llvm::Value *cooperativeMatrixConvertInternal(llvm::CastInst::CastOps castOp, llvm::Value *source, - CooperativeMatrixElementType srcElemType, - CooperativeMatrixElementType dstElemType, const llvm::Twine &instName, - llvm::Instruction *insertPos); + xdl::CooperativeMatrixElementType srcElemType, + xdl::CooperativeMatrixElementType dstElemType, + const llvm::Twine &instName, llvm::Instruction *insertPos); // Create cooperative matrix binary operation - llvm::Value *cooperativeMatrixBinaryOp(CooperativeMatrixArithOp coopMatArithOp, llvm::Value *lhs, llvm::Value *rhs, - CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, - const llvm::Twine &instName, llvm::Instruction *insertPos); + llvm::Value *cooperativeMatrixBinaryOp(xdl::CooperativeMatrixArithOp coopMatArithOp, llvm::Value *lhs, + llvm::Value *rhs, xdl::CooperativeMatrixElementType elemType, + xdl::CooperativeMatrixLayout layout, const llvm::Twine &instName, + llvm::Instruction *insertPos); // Create cooperative matrixTimeScalar operation - llvm::Value *coopMatrixTimesScalar(llvm::Value *matrix, llvm::Value *scalar, CooperativeMatrixElementType elemType, - CooperativeMatrixLayout layout, const llvm::Twine &instName, - llvm::Instruction *insertPos); + llvm::Value *coopMatrixTimesScalar(llvm::Value *matrix, llvm::Value *scalar, + xdl::CooperativeMatrixElementType elemType, xdl::CooperativeMatrixLayout layout, + const llvm::Twine &instName, llvm::Instruction *insertPos); // Create cooperative matrix reshape operation for 16bit on gfx10 and gfx110 - llvm::Value *cooperativeMatrixReshape16BitElementGfx1011(llvm::Value *matrix, CooperativeMatrixElementType elemType, - CooperativeMatrixLayout srcLayout, - CooperativeMatrixLayout dstLayout, llvm::Value *threadId, - const llvm::Twine &instName, llvm::Instruction *insertPos); + llvm::Value *cooperativeMatrixReshape16BitElementGfx1011(llvm::Value *matrix, + xdl::CooperativeMatrixElementType elemType, + xdl::CooperativeMatrixLayout srcLayout, + xdl::CooperativeMatrixLayout dstLayout, + llvm::Value *threadId, const llvm::Twine &instName, + llvm::Instruction *insertPos); // Create cooperative matrix reshape operation for 8bit on gfx10 and gfx11 llvm::Value *cooperativeMatrixReshapeBetween8bitAnd32bitElementGfx1011(llvm::Value *matrix, - CooperativeMatrixElementType srcElemType, - CooperativeMatrixLayout srcLayout, + xdl::CooperativeMatrixElementType srcElemType, + xdl::CooperativeMatrixLayout srcLayout, const llvm::Twine &instName, llvm::Instruction *insertPos); // Adjust the layout on accumulator for gfx10 - llvm::Value * - cooperativeMatrixReshapeBetween16bitAnd32bitOnAccGfx10(llvm::Value *source, CooperativeMatrixElementType srcElemType, - CooperativeMatrixElementType dstElemType, - CooperativeMatrixLayout layout, llvm::Value *isEvenGroup, - const llvm::Twine &instName, llvm::Instruction *insertPos); + llvm::Value *cooperativeMatrixReshapeBetween16bitAnd32bitOnAccGfx10( + llvm::Value *source, xdl::CooperativeMatrixElementType srcElemType, xdl::CooperativeMatrixElementType dstElemType, + xdl::CooperativeMatrixLayout layout, llvm::Value *isEvenGroup, const llvm::Twine &instName, + llvm::Instruction *insertPos); // Adjust the layout before reshape operation(eg:float16->float32) - llvm::Value *cooperativeMatrixReshapeBeforeConvert(llvm::Value *source, CooperativeMatrixElementType srcElemType, - CooperativeMatrixElementType dstElemType, - CooperativeMatrixLayout srcLayout, - CooperativeMatrixLayout dstLayout, const llvm::Twine &instName, - llvm::Instruction *insertPos); + llvm::Value *cooperativeMatrixReshapeBeforeConvert(llvm::Value *source, xdl::CooperativeMatrixElementType srcElemType, + xdl::CooperativeMatrixElementType dstElemType, + xdl::CooperativeMatrixLayout srcLayout, + xdl::CooperativeMatrixLayout dstLayout, + const llvm::Twine &instName, llvm::Instruction *insertPos); // Adjust the layout before reshape operation(eg:float32->float16) - llvm::Value *cooperativeMatrixReshapeAfterConvert(llvm::Value *source, CooperativeMatrixElementType srcElemType, - CooperativeMatrixElementType dstElemType, - CooperativeMatrixLayout srcLayout, - CooperativeMatrixLayout dstLayout, const llvm::Twine &instName, + llvm::Value *cooperativeMatrixReshapeAfterConvert(llvm::Value *source, xdl::CooperativeMatrixElementType srcElemType, + xdl::CooperativeMatrixElementType dstElemType, + xdl::CooperativeMatrixLayout srcLayout, + xdl::CooperativeMatrixLayout dstLayout, const llvm::Twine &instName, llvm::Instruction *insertPos); llvm::Value *transposeCooperativeMatrixRecursively(llvm::Value *matrix, unsigned vecStride, unsigned laneStride, @@ -196,8 +201,8 @@ class LowerCooperativeMatrix : public Patch, public llvm::PassInfoMixin { private: void visitDebugPrintf(DebugPrintfOp &op); + void writeToDebugPrintfBuffer(uint64_t header, llvm::Value *debugPrintfBuffer, + llvm::SmallVectorImpl &varData, BuilderBase &builder); void getDwordValues(llvm::Value *val, llvm::SmallVectorImpl &output, llvm::SmallBitVector &output64Bits, BuilderBase &builder); void setupElfsPrintfStrings(); diff --git a/lgc/include/lgc/lowering/LowerGpuRt.h b/lgc/include/lgc/lowering/LowerGpuRt.h index bd5762d57c..7a29025308 100644 --- a/lgc/include/lgc/lowering/LowerGpuRt.h +++ b/lgc/include/lgc/lowering/LowerGpuRt.h @@ -53,6 +53,7 @@ class GpurtLdsStackStoreOp; class GpurtGetBoxSortHeuristicModeOp; class GpurtGetRayQueryDispatchIdOp; class GpurtGetStaticFlagsOp; +class GpurtMakePcOp; class GpurtGetTriangleCompressionModeOp; class GpurtGetFlattenedGroupThreadIdOp; class GpurtFloatWithRoundModeOp; @@ -85,6 +86,7 @@ class LowerGpuRt : public llvm::PassInfoMixin { void visitGetBoxSortHeuristicMode(lgc::GpurtGetBoxSortHeuristicModeOp &inst); void visitGetRayQueryDispatchId(lgc::GpurtGetRayQueryDispatchIdOp &inst); void visitGetStaticFlags(lgc::GpurtGetStaticFlagsOp &inst); + void visitMakePc(lgc::GpurtMakePcOp &inst); void visitGetTriangleCompressionMode(lgc::GpurtGetTriangleCompressionModeOp &inst); void visitGetFlattenedGroupThreadId(lgc::GpurtGetFlattenedGroupThreadIdOp &inst); void visitFloatWithRoundMode(lgc::GpurtFloatWithRoundModeOp &inst); diff --git a/lgc/include/lgc/lowering/MutateEntryPoint.h b/lgc/include/lgc/lowering/MutateEntryPoint.h index cfd13a0363..cbb24eddaf 100644 --- a/lgc/include/lgc/lowering/MutateEntryPoint.h +++ b/lgc/include/lgc/lowering/MutateEntryPoint.h @@ -31,6 +31,7 @@ #pragma once #include "compilerutils/TypeLowering.h" +#include "llpc/GpurtEnums.h" #include "llvmraytracing/CpsStackLowering.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcDialect.h" @@ -157,7 +158,7 @@ class MutateEntryPoint : public Patch, public llvm::PassInfoMixin argNames); llvm::Value *takeLevel(llvm::Value *level, llvm::IRBuilder<> &builder, llvm::Type *waveMaskTy, - llvm::ArrayRef priorities); + llvm::ArrayRef priorities); void lowerCpsJump(llvm::Function *parent, cps::JumpOp *jumpOp, llvm::BasicBlock *tailBlock, llvm::SmallVectorImpl &exitInfos); @@ -177,6 +178,8 @@ class MutateEntryPoint : public Patch, public llvm::PassInfoMixin m_funcOldEntryBlock; }; } // namespace lgc diff --git a/lgc/include/lgc/state/IntrinsDefs.h b/lgc/include/lgc/state/IntrinsDefs.h index 6fc11b5c81..d25e783bcc 100644 --- a/lgc/include/lgc/state/IntrinsDefs.h +++ b/lgc/include/lgc/state/IntrinsDefs.h @@ -428,6 +428,7 @@ enum BufFormat { BUF_FORMAT_32_32_32_32_UINT_GFX11 = 0x0000003D, BUF_FORMAT_32_32_32_32_SINT_GFX11 = 0x0000003E, BUF_FORMAT_32_32_32_32_FLOAT_GFX11 = 0x0000003F, + }; // Enumerates destination selection of data in memory buffer. diff --git a/lgc/include/lgc/util/AddressExtender.h b/lgc/include/lgc/util/AddressExtender.h index ab555b2265..7f7939ec89 100644 --- a/lgc/include/lgc/util/AddressExtender.h +++ b/lgc/include/lgc/util/AddressExtender.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -47,7 +47,14 @@ namespace lgc { class AddressExtender { public: // Constructor - AddressExtender(llvm::Function *func) : m_func(func) {} + // + // @param func: The function that we're inserting into + // @param insertInto: The basic block to insert into. This will usually be the + // entry block, but can be overridden in special cases (e.g. + // if the function is using the llvm.amdgcn.init.whole.wave + // intrinsic) + AddressExtender(llvm::Function *func, llvm::BasicBlock *insertInto = nullptr) + : m_func(func), m_insertInto(insertInto ? insertInto : &func->front()) {} // Get first insertion point in the function, after PC-getting code if already inserted. llvm::Instruction *getFirstInsertionPt(); @@ -74,6 +81,7 @@ class AddressExtender { llvm::Instruction *getPc(); llvm::Function *m_func; + llvm::BasicBlock *m_insertInto; llvm::Instruction *m_pc = nullptr; }; diff --git a/lgc/include/lgc/util/BufferResource.h b/lgc/include/lgc/util/BufferResource.h new file mode 100644 index 0000000000..90b5fc06b9 --- /dev/null +++ b/lgc/include/lgc/util/BufferResource.h @@ -0,0 +1,44 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file BufferResource.h + * @brief LLPC source file: contains implementation of LLPC internal-use utility functions. + *********************************************************************************************************************** + */ + +#pragma once +#include "lgc/BuilderCommon.h" +#include "lgc/CommonDefs.h" +#include "lgc/state/TargetInfo.h" + +namespace lgc { +// Get the NumRecords from buffer resource descriptor +llvm::Value *getBufferNumRecords(const lgc::GfxIpVersion &gfxIpVer, lgc::BuilderCommon &builder, + llvm::Value *const bufferDesc); +// Get the stride from buffer resource descriptor +llvm::Value *getBufferStride(const lgc::GfxIpVersion &gfxIpVer, lgc::BuilderCommon &builder, + llvm::Value *const bufferDesc); +} // namespace lgc diff --git a/lgc/interface/lgc/Builder.h b/lgc/interface/lgc/Builder.h index fb7d6b34db..4300a92ff4 100644 --- a/lgc/interface/lgc/Builder.h +++ b/lgc/interface/lgc/Builder.h @@ -181,6 +181,7 @@ class BuilderDefs : public BuilderCommon { BufferFlagAttachedCounter = 128, // Flag to return the counter buffer descriptor attached to the main buffer. BufferFlagForceRawView = 256, // Flag to convert the buffer descriptor to raw view. BufferFlagCoherent = 512, // Coherent memory access + BufferFlagLLcNoAlloc = 1024, // Disable Mall cache }; // Get the type of a built-in -- static edition of the method below, so you can use it without a BuilderDefs object. diff --git a/lgc/interface/lgc/BuilderCommon.h b/lgc/interface/lgc/BuilderCommon.h index d850572511..ee2925dfd6 100644 --- a/lgc/interface/lgc/BuilderCommon.h +++ b/lgc/interface/lgc/BuilderCommon.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -31,15 +31,10 @@ #pragma once #include "llvm-dialects/Dialect/Builder.h" -#include "llvm/IR/IRBuilder.h" namespace lgc { enum class ResourceNodeType : unsigned; -enum class CooperativeMatrixMemoryAccess : unsigned; -enum class CooperativeMatrixElementType : unsigned; -enum class CooperativeMatrixLayout : unsigned; -enum class CooperativeMatrixArithOp : unsigned; // ===================================================================================================================== // BuilderCommon extends llvm_dialects::Builder, which extends llvm::IRBuilder<>, and provides a few utility methods @@ -110,22 +105,6 @@ class BuilderCommon : public llvm_dialects::Builder { // // @param instName : Name to give instruction(s) llvm::Instruction *CreateDebugBreak(const llvm::Twine &instName = ""); - - // ----------------------------------------------------------------------------------------------------------------- - // Cooperative matrix operation. - - // Convert the element type enum into the corresponding LLVM type. - llvm::Type *transCooperativeMatrixElementType(CooperativeMatrixElementType elemType); - - // Get the LGC type of a cooperative matrix with the given element type, layout and K size. - llvm::Type *getCooperativeMatrixTy(CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, - unsigned kSize = 16); - - // Whether the type of a cooperative matrix is specified bit width. - static bool isTypeNCooperativeMatrix(CooperativeMatrixElementType elemType, unsigned bitWidth); - - // Get the bit width of the cooperative matrix element. - static unsigned getBitWidthOfCooperativeMatrixElement(CooperativeMatrixElementType elemType); }; } // namespace lgc diff --git a/lgc/interface/lgc/Disassembler.h b/lgc/interface/lgc/Disassembler.h index 8eca22eefe..3ed4135589 100644 --- a/lgc/interface/lgc/Disassembler.h +++ b/lgc/interface/lgc/Disassembler.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2021-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -30,14 +30,24 @@ */ #pragma once +#include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" namespace lgc { -// Disassemble an object (typically ELF) into ostream. Does report_fatal_error on error. +// Disassemble an object (typically ELF) into ostream. // // @param data : The object file contents // @param ostream : The stream to disassemble into -void disassembleObject(llvm::MemoryBufferRef data, llvm::raw_ostream &ostream); +// @returns : Error::success() if no errors occurred, otherwise returns the Error object +llvm::Error disassembleObject(llvm::MemoryBufferRef data, llvm::raw_ostream &ostream); + +// Disassemble a single symbol within an object (typically ELF) into ostream. +// +// @param data : The object file contents +// @param ostream : The stream to disassemble into +// @param symbolName : symbol to disassemble +// @returns : Error::success() if no errors occurred, otherwise returns the Error object +llvm::Error disassembleSingleSymbol(llvm::MemoryBufferRef data, llvm::raw_ostream &ostream, llvm::StringRef symbolName); } // namespace lgc diff --git a/lgc/interface/lgc/LgcDialect.h b/lgc/interface/lgc/LgcDialect.h index 2ed87fea56..bb7339afd3 100644 --- a/lgc/interface/lgc/LgcDialect.h +++ b/lgc/interface/lgc/LgcDialect.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -30,60 +30,6 @@ */ #pragma once -namespace lgc { - -enum class CooperativeMatrixMemoryAccess : unsigned { - MemoryAccessMaskNone = 0x00, // No mask - MemoryAccessVolatileMask = 0x01, // Access memory in volatile - MemoryAccessCoherentMask = 0x02, // Access memory in coherent - MemoryAccessTemporalMask = 0x04, // Access memory in temporal -}; - -enum class CooperativeMatrixElementType : unsigned { - Unknown = 0, // Unknown - Float16, // 16-bit floating-point - Float32, // 32-bit floating-point - Int8, // 8-bit integer - Int16, // 16-bit integer - Int32, // 32 bit integer - Float16Packed, // packed 16-bit floating-point - BFloat16, // 16-bit brain floating-point - Float8, // 8-bit floating-point - BFloat8, // 8-bit brain floating-point - Int4, // 4-bit integer -}; - -// Layout is virtual concept, eg: 16bit and 32bit for matrixC will share the same layout initially. -// It will be passed as the argument of getTypeProperties to calculate the more detailed layout information. -enum class CooperativeMatrixLayout : unsigned { - FactorMatrixLayout = 0, // A/B layout on gfx10/gfx11 - AccumulatorMatrixLayout, // C/D layout on gfx11 - Gfx10AccumulatorMatrixLayout, // 32bit@C/D layout on gfx10 - Gfx10Accumulator16bitMatrixLayout, // 16bit@C/D layout on gfx10 - InvalidLayout -}; - -// The cooperative matrix arithmetic operations the builder can consume. -// NOTE: We rely on casting this implicitly to an integer, so we cannot use an enum class. -enum class CooperativeMatrixArithOp : unsigned { - IAdd = 0, - FAdd, - ISub, - FSub, - IMul, - FMul, - UDiv, - SDiv, - FDiv, - UMod, - SRem, - SMod, - FRem, - FMod -}; - -} // namespace lgc - #define GET_INCLUDES #define GET_DIALECT_DECLS #include "lgc/LgcDialect.h.inc" diff --git a/lgc/interface/lgc/LgcDialect.td b/lgc/interface/lgc/LgcDialect.td index 0ea4ceafc5..f85c2519e1 100644 --- a/lgc/interface/lgc/LgcDialect.td +++ b/lgc/interface/lgc/LgcDialect.td @@ -38,11 +38,6 @@ def TaskPayloadPointer : TgConstant<(PointerType 7)>, Type; def V4I32 : TgConstant<(FixedVectorType I32, 4)>, Type; -defm CooperativeMatrixMemoryAccess : AttrEnum<"CooperativeMatrixMemoryAccess">; -defm CooperativeMatrixElementType : AttrEnum<"CooperativeMatrixElementType">; -defm CooperativeMatrixLayout : AttrEnum<"CooperativeMatrixLayout">; -defm CooperativeMatrixArithOp : AttrEnum<"CooperativeMatrixArithOp">; - def NoDivergenceSource : LlvmEnumAttributeTrait<"NoDivergenceSource">; class DivergentLgcOp traits_ = []> @@ -230,6 +225,18 @@ def DebugPrintfOp : LgcOp<"debug.printf", [Memory<[(readwrite InaccessibleMem)]> }]; } +def AbortMsgOp : LgcOp<"abort.msg", [Memory<[(readwrite InaccessibleMem)]>, WillReturn]> { + let arguments = (ins varargs:$args); + let results = (outs); + + let summary = "output abort messages"; + let description = [{ + Writes constant values to the metadata and variables to the debug printf buffer pointed to by `buffer`. + + Arguments must be of type `i32`. + }]; +} + def TaskPayloadPtrOp : LgcOp<"task.payload.ptr", [Memory<[]>, WillReturn]> { let arguments = (ins); let results = (outs TaskPayloadPointer:$payload); @@ -710,147 +717,6 @@ def SubgroupRotateOp : LgcOp<"subgroup.rotate", [NoUnwind, Convergent]> { }]; } -def CooperativeRowAccLoadOp : DivergentLgcOp<"cooperative.rowacc.load", [Memory<[(read)]>, WillReturn]> { - let arguments = (ins value:$pointer, I32:$stride, CooperativeMatrixElementType:$elem_type, CooperativeMatrixMemoryAccess:$memory_access); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "load cooperative rowacc from memory"; - let description = [{ - Load contiguous elements from the specified location of the memory. - - Return acc row data in finalized mode. - - 'pointer' is the pointer address to the data. - 'stride' is the stride in bytes in memory between the first elements in the source data. - 'elem_type' is the element type for the row acc. - - 'memory_access' is a set of flags describing the memory. - - Bit 0 is set if the memory is volatile - - Bit 1 is set if the memory is coherent - - Bit 2 is set if the memory is temporal. - }]; -} - -def CooperativeRowAccStoreOp : LgcOp<"cooperative.rowacc.store", [Memory<[(write)]>]> { - let arguments = (ins value:$pointer, I32:$stride, CooperativeMatrixElementType:$elem_type, value:$data, CooperativeMatrixMemoryAccess:$memory_access); - let results = (outs); - - let summary = "store cooperative rowacc to memory"; - let description = [{ - Store a contiguous elements from the specified location of the memory. - - 'pointer' is the pointer address to the data. - 'stride' is the stride in bytes in memory between the first elements in the source data. - 'elem_type' is the element type for the row acc. - 'data' is data of row acc, Must be in finalized mode. - - 'memory_access' is a set of flags describing the memory. - - Bit 0 is set if the memory is volatile - - Bit 1 is set if the memory is coherent - - Bit 2 is set if the memory is temporal. - }]; -} - -def CooperativeRowAccAccumulateModeOp : DivergentLgcOp<"cooperative.rowacc.accumulate.mode", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$row_acc, CooperativeMatrixElementType:$elem_type); - let results = (outs (eq $row_acc):$result); - - let summary = "change cooperative row acc date mode from finalize mode to accumulate mode"; - let description = [{ - convert the row acc data from finalize mode to accumulate mode. - - Return acc row data in accumulate mode. - - 'row_acc' is the input row acc data, must be in finalize mode. - 'elem_type' is the element type for the row acc. - }]; -} - -def CooperativeRowAccFinalizeModeOp : DivergentLgcOp<"cooperative.rowacc.finalize.mode", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$row_acc, CooperativeMatrixElementType:$elem_type); - let results = (outs (eq $row_acc):$result); - - let summary = "change cooperative rowacc date mode from accumulate state to finalize state"; - let description = [{ - convert the row accumulator data from accumulate mode to finalize mode. - - Return row accumulator data in finalized mode. - - 'row_acc' is the input row acc data, must be in accumulate mode. - 'elem_type' is the element type for the row acc. - }]; -} - -def CooperativeRowAccSplatOp : DivergentLgcOp<"cooperative.rowacc.splat", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$scalar, CooperativeMatrixElementType:$elem_type); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "fill cooperative rowacc will a scalar value"; - let description = [{ - Return filled cooperative acc row in finalize mode. - - 'scalar' is the scalar value for fill the cooperative row acc. - 'elem_type' is the element type for the cooperative row acc. - }]; -} - -def CooperativeRowAccSumAccumulateOp : DivergentLgcOp<"cooperative.rowacc.sum.accumulate", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$matrix, CooperativeMatrixElementType:$matrix_elem_type, CooperativeMatrixLayout:$matrix_layout, value:$row_acc, CooperativeMatrixElementType:$row_acc_elem_type, AttrI1:$is_signed); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "sum and accumuate columns of cooperative matrix value to cooperative row accumulator"; - let description = [{ - Return accumulated acc row data in accumulate mode. - - 'matrix' is the input cooperative matrix. Must be A/B matrix. - 'matrix_elem_type' is the element type for the cooperative matrix. - 'matrix_layout' is the layout for the cooperative matrix. - 'row_acc' is the input cooperative row acc, must be in accumulate mode. - 'row_acc_elem_type' is the element type for input cooperative row acc. - 'is_signed' indicate if row accumulator element type is considered signed or not. - }]; -} - -def CooperativeRowAccScalarOp : DivergentLgcOp<"cooperative.rowacc.scalar", [Memory<[]>, WillReturn]> { - let arguments = (ins CooperativeMatrixArithOp:$binop, value:$row_acc, CooperativeMatrixElementType:$elem_type, value:$scalar, AttrI1:$accumulate_mode); - let results = (outs (eq $row_acc):$result); - - let summary = "cooperative row accumulator scalar operation in accumulate or finalize mode"; - let description = [{ - Return the cooperative row accumulator data with same mode (accumulate or finalized) as input cooperative accumulator data. - - 'binop' is the cooperative matrix arithmetic operation. - 'row_acc' is the input cooperative row accumulator. - 'elem_type' is the element type for the cooperative row accumulator data. - 'scalar' is the scalar value for operation. - 'accumulate_mode' indicate if input and return accumulator data in accumulate or finalize mode. - }]; -} - -def CooperativeRowAccExpandOp : DivergentLgcOp<"cooperative.rowacc.expand", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$row_acc, CooperativeMatrixElementType:$row_acc_elem_type, CooperativeMatrixElementType:$matrix_elem_type, CooperativeMatrixLayout:$matrix_layout, AttrI1:$col_major); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "expand cooperative row accumulator data to cooperative matrix, the input row accumulator data must be in finalize mode."; - let description = [{ - Return the cooperative matrix. - - 'row_acc' is the input cooperative row accumulator. - 'row_acc_elem_type' is the element type for the input cooperative row accumulator data. - 'matrix_elem_type' is the element type for the output cooperative matrix. - 'matrix_layout' is the element type for the output cooperative matrix. - 'col_major' indicate how to expand the cooperative row acculumlator data by row or col. - }]; -} - def LoadDriverTableEntryOp : LgcOp<"load.driver.table.entry", [Memory<[]>, WillReturn]> { let arguments = (ins AttrI32:$offset); let results = (outs value:$entry); @@ -875,323 +741,6 @@ def InvariantDecorationOp : LgcOp<"invariant.decoration", [WillReturn]> { }]; } -def CooperativeMatrixLengthOp : LgcOp<"cooperative.matrix.length", [Memory<[]>, WillReturn]> { - let arguments = (ins CooperativeMatrixLayout:$layout, AttrI32:$k_size); - let results = (outs I32:$result); - - let summary = "get the length for the cooperative matrix"; - let description = [{ - Get the "length" of a matrix of the given layout, i.e. the number of matrix components stored per lane. - - 'layout' is layout of cooperative matrix. - 'k_size' is the matrix K size. - }]; -} - -def CooperativeMatrixLoadOp : DivergentLgcOp<"cooperative.matrix.load", [Memory<[(read)]>, Convergent, WillReturn]> { - let arguments = (ins value:$pointer, value:$stride, AttrI1:$col_major, CooperativeMatrixElementType:$elem_type, - CooperativeMatrixLayout:$layout, AttrI32:$memory_access, AttrI32:$alignment, AttrI32:$k_size); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "load the cooperative matrix elements per lane"; - let description = [{ - Load the elements of cooperative matrix per lane through a pointer. - - Return or vector containing all the elements of the cooperative matrix per lane. - - 'pointer' is the pointer address of the first element of the cooperative matrix stored in memory. - 'stride' is the stride in bytes in memory between the first elements in the source data. - 'col_major' is the order of the data loaded from memory, col-major or row-major. - 'elem_type' is the element type of the cooperative matrix. - 'layout' is the layout of the input cooperative matrix. - - 'memory_access' is a set of flags describing the memory. - - Bit 0 is set if the memory is volatile - - Bit 1 is set if the memory is coherent - - Bit 2 is set if the memory is temporal. - - 'alignment' is the alignment of this load operation. - 'k_size' is the matrix K size. - }]; -} - -def CooperativeMatrixStoreOp : LgcOp<"cooperative.matrix.store", [Memory<[(write)]>, Convergent]> { - let arguments = (ins value:$pointer, value:$stride, AttrI1:$col_major, CooperativeMatrixElementType:$elem_type, - CooperativeMatrixLayout:$layout, AttrI32:$memory_access, AttrI32:$alignment, - value:$store_value, AttrI32:$k_size); - let results = (outs); - - let summary = "Store cooperative matrix elements per lane to the memory"; - let description = [{ - Store cooperative matrix elements per lane to the memory through the pointer. The elements should be converted to - or type. - - 'pointer' is the pointer address of the data array in memory. - 'stride' is the stride in bytes in memory between the first elements in the source data. - 'col_major' is the order of the data stored into memory, col-major or row-major. - 'elem_type' is the element type of the cooperative matrix. - 'layout' is the layout of the input cooperative matrix. - - 'memory_access' is a set of flags describing the memory. - - Bit 0 is set if the memory is volatile - - Bit 1 is set if the memory is coherent - - Bit 2 is set if the memory is temporal. - - 'alignment' is the alignment of this store operation. - 'store_value' is the elements of the cooperative matrix perlane typed in or to be stored in memory. - 'k_size' is the matrix K size. - }]; -} - -def CooperativeMatrixFillOp : DivergentLgcOp<"cooperative.matrix.fill", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$scalar, CooperativeMatrixElementType:$elem_type, CooperativeMatrixLayout:$layout, AttrI32:$k_size); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "Return a matrix filled with a scalar value"; - let description = [{ - Return a matrix whose elements are all equal to the given `scalar`. - - 'scalar' is the value to fill the cooperative matrix. - 'elem_type' is the element type for the cooperative matrix. - 'layout' is the layout of the input cooperative matrix. - 'k_size' is the matrix K size. - }]; -} - -def CooperativeMatrixExtractOp : DivergentLgcOp<"cooperative.matrix.extract", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$matrix, value:$index, CooperativeMatrixElementType:$elem_type, CooperativeMatrixLayout:$layout); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "return the element extracted from the cooperative matrix by index"; - let description = [{ - Returns the value at the given `index` in the input matrix. - - 'matrix' is the matrix from which to extract a component. - 'index' is the index to be extracted. - 'elem_type' is the element type for the cooperativ ematrix. - 'layout' is the layout of the input cooperative matrix. - }]; -} - -def CooperativeMatrixInsertOp : DivergentLgcOp<"cooperative.matrix.insert", [Memory<[]>, WillReturn]> { - let arguments = (ins value:$matrix, value:$insert_value, value:$index, CooperativeMatrixElementType:$elem_type, CooperativeMatrixLayout:$layout); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "Insert the element into the cooperative matrix"; - let description = [{ - Insert the given `insert_value` at the given `index` into the input matrix and return the matrix. - - 'matrix' is the matrix from which to extract a component. - 'value' is the value to be inserted. - 'index' is the index to be inserted. - 'elem_type' is the element type for the cooperative matrix. - 'layout' is the layout of the input cooperative matrix. - }]; -} - -def CooperativeMatrixConvertOp : DivergentLgcOp<"cooperative.matrix.convert", [Memory<[(read)]>, Convergent, - WillReturn]> { - let arguments = (ins AttrI32:$cast_op, value:$source, CooperativeMatrixElementType:$src_elem_type, CooperativeMatrixElementType:$dst_elem_type, - CooperativeMatrixLayout:$src_layout, CooperativeMatrixLayout:$dst_layout); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "Reshape the layout for cooperative matrix or cooperative matrix element-wise-conversion operation"; - let description = [{ - This operation will convert the input matrix into either a different layout or convert the elements into a different type and return the result. - - 'cast_op' is the conversion operation. 0 means reshape on cooperative matrix layout, other values are for element-wise-conversion. - 'source' is the source cooperative matrix. - 'src_elem_type' is the source cooperative matrix's element type. - 'dst_elem_type' is the destination cooperative matrix's element type. - 'src_layout' is the layout for source cooperative matrix. - 'dst_layout' is the layout for target cooperative matrix. - }]; -} - -def CooperativeMatrixTransposeOp : DivergentLgcOp<"cooperative.matrix.transpose", [Convergent, WillReturn]> { - let arguments = (ins value:$matrix, CooperativeMatrixElementType:$elem_type, CooperativeMatrixLayout:$layout); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "Transpose the cooperative matrix in register level and change its layout"; - let description = [{ - This operation will transpose the input matrix and return the transposed matrix. - - 'matrix' is the original cooprative matrix for transposition. - 'elem_type' is the element type for the cooperative matrix. - 'layout' is the layout of the input cooperative matrix. - }]; -} - -def CooperativeMatrixBinaryOp : DivergentLgcOp<"cooperative.matrix.binary", [Convergent, WillReturn]> { - let arguments = (ins CooperativeMatrixArithOp:$arith_op, value:$lhs, value:$rhs, CooperativeMatrixElementType:$elem_type, - CooperativeMatrixLayout:$layout); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "Cooperativematrix binary operation"; - let description = [{ - Perform a binary operation on two matrices and return the resulting matrix. - The two input matrices need to have the same layout and element type. - - 'arith_op' is the arithmetic operation. - 'lhs' is the first operation of cooperative matrix. - 'rhs' is the second operation of cooperative matrix. - 'elem_type' is the element type of cooperative matrix. - 'layout' is the layout of the input cooperative matrix. - }]; -} - -def CooperativeMatrixTimesScalarOp : DivergentLgcOp<"cooperative.matrix.times.scalar", [Convergent, WillReturn]> { - let arguments = (ins value:$matrix, value:$scalar, CooperativeMatrixElementType:$elem_type, - CooperativeMatrixLayout:$layout); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "Compute Matrix x Scalar and return the resulting cooperative matrix"; - let description = [{ - Multiply all matrix elements in the input matrix by the given `scalar`. - - 'matrix' is the matrix operand for the operation. - 'scalar' is the scalar operand for the operation. - 'elem_type' is the element type for the cooperativematrix operand. - 'layout' is the layout for the cooperative matrix. - }]; -} - -def CooperativeMatrixMulAddOp : DivergentLgcOp<"cooperative.matrix.muladd", [Convergent, WillReturn]> { - let arguments = (ins value:$matrix_a, value:$matrix_b, value:$matrix_c, AttrI1:$is_signed_a, AttrI1:$is_signed_b, - AttrI1:$is_sat_or_opsel, AttrI1:$is_tied, CooperativeMatrixElementType:$matrix_a_elem_type, - CooperativeMatrixElementType:$matrix_b_elem_type, CooperativeMatrixElementType:$matrix_c_elem_type, CooperativeMatrixElementType:$matrix_d_elem_type, AttrI32:$k_multiplier); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "Calculate `matrix_a` * `matrix_b` + `matrix_c`"; - let description = [{ - Multiply `matrix_a` by `matrix_b` and add `matrix_c`. The resulting matrix has the same type as `matrix_c`. - - 'matrix_a' is the factor cooperative matrix whose use is MatrixAKHR. - 'matrix_b' is the factor cooperative matrix whose use is MatrixBKHR. - 'matrix_c' is the accumulator cooperative matrix whose use is MatrixAccumulatorKHR. - 'is_signed_a' is the signess for matrix_a's element type. - 'is_signed_b' is the signess for matrix_b's element type. - - 'is_sat_or_opsel' is the saturatingAccumulation for calculation, - In the case of 16-bit floating point matrices, this bit acts as an opsel bit, - if it is set to false, we store the result in the lower half of - the registers. If it is true, we store it in the upper half. - - 'is_tied' is the flag of the output matrix has to be the same - as the input accumulator (i.e., D has to be C) - - '$matrix_a_elem_type' is the component type of the matrix A - '$matrix_b_elem_type' is the component type of the matrix B - '$matrix_c_elem_type' is the component type of the matrix C - '$matrix_d_elem_type' is the component type of the matrix D - '$k_multiplier' is the multiplier for the matrix K size. - }]; -} - -def CooperativeMatrixPackOp : DivergentLgcOp<"cooperative.matrix.pack", [Memory<[(read)]>, WillReturn]> { - let arguments = (ins value:$matrix_c_lo, value:$matrix_c_hi); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "Cooperativematrix pack operation"; - let description = [{ - This is to pack two accumulator cooperativematrices and store in the same registers. - - 'matrix_c_lo' is the lower accumulator cooperative matrix to be packed. - 'matrix_c_hi' is the upper accumulator cooperative matrix to be packed. - }]; -} - -def CooperativeMatrixUnPackOp : DivergentLgcOp<"cooperative.matrix.unpack", [Memory<[(read)]>, WillReturn]> { - let arguments = (ins value:$packed_matrix, AttrI1:$get_upper_half); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "Restores an unpacked matrix from a packed accumulator"; - let description = [{ - Returns the unpacked matrix stored in either the upper or lower half of a packed accumulator. - - 'packed_matrix' is the packed Accumulator cooperative matrix. - - 'get_upper_half' is the flag of getting the upper half or lower half of the register. - - if it's true, it will unpack cooperative matrix stored in the upper half register. - - if it's false, it will unpack cooperative matrix stored in the lower half register. - }]; -} - -def SparsityIndexLoadOp : DivergentLgcOp<"sparsityindex.load", [Memory<[(read)]>, Convergent, WillReturn]> { - let arguments = (ins value:$pointer, value:$stride, AttrI1:$col_major, AttrI32:$memory_access); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "load the sparsity index for the sparse cooperative matrix"; - let description = [{ - Load the sparsity index for sparse cooperative matrix A which will be used in sparseA * DenseB + DenseC - Return value which size is [unused_16bit | index_16bit] for wave32 or [unused_24bit | index_8bit] for wave64. - - 'pointer' is the pointer to the index data stored in memory. - 'stride' is to qualify how the index data is laid out in memory. It must be of scalar integer type. - 'col_major' is a constant instruction with 32-bit integer type whose value corresponds to a Sparsity Index Memory Layout. - - 'memory_access' is a set of flags describing the memory. - - Bit 0 is set if the memory is volatile - - Bit 1 is set if the memory is coherent - - Bit 2 is set if the memory is temporal. - }]; -} - -def SparseCooperativeMatrixMulAddOp : DivergentLgcOp<"sparseCooperativeMatrix.muladd", [Convergent, WillReturn]> { - let arguments = (ins value:$matrix_a, value:$sparse_index, value:$matrix_b, value:$matrix_c, AttrI1:$is_signed_a, AttrI1:$is_signed_b, - AttrI1:$is_sat, CooperativeMatrixElementType:$matrix_a_elem_type, - CooperativeMatrixElementType:$matrix_b_elem_type, CooperativeMatrixElementType:$matrix_c_elem_type, CooperativeMatrixElementType:$matrix_d_elem_type, AttrI32:$k_multiplier); - let results = (outs value:$result); - - let defaultBuilderHasExplicitResultType = true; - - let summary = "The muladd operation supported as sparseA * denseB + denseC"; - let description = [{ - Sparse linear-algebraic matrix multiply of A and B with structural sparsity - information taken from Index, followed by component-wise addition of C. - The semantics of the multiplication are defined by the sparsity format of Index. - - Only support sparseA*DenseB+DenseC now. - - 'matrix_a' is the factor cooperative matrix whose use is MatrixAKHR. - 'matrix_b' is the factor cooperative matrix whose use is MatrixBKHR. - 'sparse_index' is the sparsity index. - 'matrix_c' is the accumulator cooperative matrix whose use is MatrixCKHR. - 'is_signed_a' is the signess for matrixA's element type. - 'is_signed_b' is the signess for matrixB's element type. - 'is_sat' is the saturatingAccumulation for calculation, - '$matrix_a_elem_type' is the component type of the A matrix. - '$matrix_b_elem_type' is the component type of the B matrix. - '$matrix_c_elem_type' is the component type of the C matrix. - '$matrix_d_elem_type' is the component type of the D matrix. - '$k_multiplier' is the multiplier for the matrix K size. - }]; -} - def LoadTfeOp : LgcOp<"load.tfe", [Memory<[]>, WillReturn]> { let arguments = (ins (or BufferPointer, BufferStridedPointer):$pointer); let results = (outs value:$result); diff --git a/lgc/interface/lgc/Pipeline.h b/lgc/interface/lgc/Pipeline.h index 183414ecb1..0ca7a91fa3 100644 --- a/lgc/interface/lgc/Pipeline.h +++ b/lgc/interface/lgc/Pipeline.h @@ -119,6 +119,13 @@ enum CpsFlag : unsigned { CpsFlagStackInGlobalMem = 1 << 0, // Put stack in global memory instead of scratch. }; +/// Enumerate llvm schedule strategy. +enum class LlvmScheduleStrategy : unsigned { + None = 0, + MaxMemoryClause = 1, // Maximize memory clause + MaxIlp = 2 // Maximize ILP +}; + // Value for shadowDescriptorTable pipeline option. static const unsigned ShadowDescriptorTableDisable = ~0U; @@ -129,7 +136,7 @@ static const char SampleShadingMetaName[] = "lgc.sample.shading"; // The front-end should zero-initialize a struct with "= {}" in case future changes add new fields. // Note: new fields must be added to the end of this structure to maintain test compatibility. union Options { - unsigned u32All[50]; + unsigned u32All[52]; struct { uint64_t hash[2]; // Pipeline hash to set in ELF PAL metadata unsigned includeDisassembly; // If set, the disassembly for all compiled shaders will be included @@ -161,7 +168,7 @@ union Options { unsigned reserved0f; // Reserved for future functionality unsigned useResourceBindingRange; // A resource node binding is the start of a range whose size is // sizeInDwords/stride. - unsigned optimizeTessFactor; // If set, we can determine either send HT_TessFactor message or write to TF buffer + unsigned optimizeTessFactor; // If set, we can determine either send HS_TESSFACTOR message or write to TF buffer // depending the values of tessellation factors. unsigned enableInterpModePatch; // Enable to do per-sample interpolation for nonperspective and smooth input unsigned pageMigrationEnabled; // Enable page migration @@ -195,7 +202,7 @@ union Options { bool enableExtendedRobustBufferAccess; // Enable the extended robust buffer access bool sampleMaskExportOverridesAlphaToCoverage; // Whether to use sample mask export overriding alpha to coverage bool disableSampleCoverageAdjust; // Disable the adjustment of sample coverage - bool forceFragColorDummyExport; // Force dummy export is added to fragment shader color export. + bool forceNullFsDummyExport; // Force dummy export to be added for null fragment shader unsigned reserved22; bool dynamicTopology; // Whether primitive topology is dynamic. bool robustBufferAccess; // Enable the core robust buffer access @@ -208,8 +215,9 @@ union Options { unsigned reserved24; bool checkRawBufferAccessDescStride; // Check descriptor stride to workaround an issue that a strided buffer desc is // used for a raw buffer access instruction. - + bool padBufferSizeToNextDword; // Vulkan only, set if the driver rounds the buffer size up the next dword unsigned reserved26[2]; + bool reserved27; }; }; static_assert(sizeof(Options) == sizeof(Options::u32All)); @@ -230,7 +238,7 @@ struct ColorExportInfo { // Note: new fields must be added to the end of this structure to maintain test compatibility. // The front-end should zero-initialize this with "= {}" in case future changes add new fields. union ShaderOptions { - unsigned u32All[34]; + unsigned u32All[36]; struct { uint64_t hash[2]; // Shader hash to set in ELF PAL metadata unsigned trapPresent; // Indicates a trap handler will be present when this pipeline is executed, @@ -335,6 +343,9 @@ union ShaderOptions { /// Force underflow prevention for log and pow bool forceUnderflowPrevention; + + /// Choose llvm's instruction scheduling strategy. + LlvmScheduleStrategy scheduleStrategy; }; }; static_assert(sizeof(ShaderOptions) == sizeof(ShaderOptions::u32All)); diff --git a/lgc/lowering/AddLoopMetadata.cpp b/lgc/lowering/AddLoopMetadata.cpp index c102994043..7e7c19f2cb 100644 --- a/lgc/lowering/AddLoopMetadata.cpp +++ b/lgc/lowering/AddLoopMetadata.cpp @@ -104,7 +104,7 @@ PreservedAnalyses AddLoopMetadata::run(Loop &loop, LoopAnalysisManager &analysis LLVM_DEBUG(dbgs() << "Run the pass Add-Loop-Metadata\n"); - Function *func = loop.getHeader()->getFirstNonPHI()->getFunction(); + Function *func = loop.getHeader()->getParent(); PipelineState *mPipelineState = pipelineState; m_context = &loop.getHeader()->getContext(); diff --git a/lgc/lowering/CollectResourceUsage.cpp b/lgc/lowering/CollectResourceUsage.cpp index d8842ce63c..84be3fb783 100644 --- a/lgc/lowering/CollectResourceUsage.cpp +++ b/lgc/lowering/CollectResourceUsage.cpp @@ -2075,176 +2075,97 @@ void CollectResourceUsage::mapBuiltInToGenericInOut() { // Map built-in outputs to generic ones if (nextStage == ShaderStage::TessEval) { - const auto &nextBuiltInUsage = nextResUsage->builtInUsage.tes; - auto &nextInOutUsage = nextResUsage->inOutUsage; - // NOTE: For tessellation control shader, those built-in outputs that involve in output import have to // be mapped to generic ones even if they do not have corresponding built-in inputs used in next shader // stage. + const auto &nextBuiltInUsage = nextResUsage->builtInUsage.tes; + if (nextBuiltInUsage.positionIn) { - assert(nextInOutUsage.builtInInputLocMap.find(BuiltInPosition) != nextInOutUsage.builtInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInPosition]; - inOutUsage.builtInOutputLocMap[BuiltInPosition] = mapLoc; - availOutMapLoc = std::max(availOutMapLoc, mapLoc + 1); + builtInUsage.tcs.position = true; } else { - if (m_importedOutputBuiltIns.find(BuiltInPosition) != m_importedOutputBuiltIns.end()) - inOutUsage.builtInOutputLocMap[BuiltInPosition] = InvalidValue; - else + if (m_importedOutputBuiltIns.find(BuiltInPosition) == m_importedOutputBuiltIns.end()) builtInUsage.tcs.position = false; } if (nextBuiltInUsage.pointSizeIn) { - assert(nextInOutUsage.builtInInputLocMap.find(BuiltInPointSize) != nextInOutUsage.builtInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInPointSize]; - inOutUsage.builtInOutputLocMap[BuiltInPointSize] = mapLoc; - availOutMapLoc = std::max(availOutMapLoc, mapLoc + 1); + builtInUsage.tcs.pointSize = true; } else { - if (m_importedOutputBuiltIns.find(BuiltInPointSize) != m_importedOutputBuiltIns.end()) - inOutUsage.builtInOutputLocMap[BuiltInPointSize] = InvalidValue; - else + if (m_importedOutputBuiltIns.find(BuiltInPointSize) == m_importedOutputBuiltIns.end()) builtInUsage.tcs.pointSize = false; } if (nextBuiltInUsage.clipDistanceIn > 0) { - assert(nextInOutUsage.builtInInputLocMap.find(BuiltInClipDistance) != nextInOutUsage.builtInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInClipDistance]; - inOutUsage.builtInOutputLocMap[BuiltInClipDistance] = mapLoc; - availOutMapLoc = std::max(availOutMapLoc, mapLoc + (nextBuiltInUsage.clipDistanceIn > 4 ? 2u : 1u)); + builtInUsage.tcs.clipDistance = nextBuiltInUsage.clipDistanceIn; } else { - if (m_importedOutputBuiltIns.find(BuiltInClipDistance) != m_importedOutputBuiltIns.end()) - inOutUsage.builtInOutputLocMap[BuiltInClipDistance] = InvalidValue; - else + if (m_importedOutputBuiltIns.find(BuiltInClipDistance) == m_importedOutputBuiltIns.end()) builtInUsage.tcs.clipDistance = 0; } if (nextBuiltInUsage.cullDistanceIn > 0) { - assert(nextInOutUsage.builtInInputLocMap.find(BuiltInCullDistance) != nextInOutUsage.builtInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInCullDistance]; - inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = mapLoc; - availOutMapLoc = std::max(availOutMapLoc, mapLoc + (nextBuiltInUsage.cullDistanceIn > 4 ? 2u : 1u)); + builtInUsage.tcs.cullDistance = nextBuiltInUsage.cullDistanceIn; } else { - if (m_importedOutputBuiltIns.find(BuiltInCullDistance) != m_importedOutputBuiltIns.end()) - inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = InvalidValue; - else + if (m_importedOutputBuiltIns.find(BuiltInCullDistance) == m_importedOutputBuiltIns.end()) builtInUsage.tcs.cullDistance = 0; } if (nextBuiltInUsage.layerIn) { - assert(nextInOutUsage.builtInInputLocMap.find(BuiltInLayer) != nextInOutUsage.builtInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInLayer]; - inOutUsage.builtInOutputLocMap[BuiltInLayer] = mapLoc; - availOutMapLoc = std::max(availOutMapLoc, mapLoc + 1); + builtInUsage.tcs.layer = true; } else { - if (m_importedOutputBuiltIns.find(BuiltInLayer) != m_importedOutputBuiltIns.end()) - inOutUsage.builtInOutputLocMap[BuiltInLayer] = InvalidValue; - else + if (m_importedOutputBuiltIns.find(BuiltInLayer) == m_importedOutputBuiltIns.end()) builtInUsage.tcs.layer = false; } if (nextBuiltInUsage.viewportIndexIn) { - assert(nextInOutUsage.builtInInputLocMap.find(BuiltInViewportIndex) != nextInOutUsage.builtInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInViewportIndex]; - inOutUsage.builtInOutputLocMap[BuiltInViewportIndex] = mapLoc; - availOutMapLoc = std::max(availOutMapLoc, mapLoc + 1); + builtInUsage.tcs.viewportIndex = true; } else { - if (m_importedOutputBuiltIns.find(BuiltInViewportIndex) != m_importedOutputBuiltIns.end()) - inOutUsage.builtInOutputLocMap[BuiltInViewportIndex] = InvalidValue; - else + if (m_importedOutputBuiltIns.find(BuiltInViewportIndex) == m_importedOutputBuiltIns.end()) builtInUsage.tcs.viewportIndex = false; } if (nextBuiltInUsage.tessLevelOuter) { - assert(nextInOutUsage.perPatchBuiltInInputLocMap.find(BuiltInTessLevelOuter) != - nextInOutUsage.perPatchBuiltInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.perPatchBuiltInInputLocMap[BuiltInTessLevelOuter]; - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelOuter] = mapLoc; - availPerPatchOutMapLoc = std::max(availPerPatchOutMapLoc, mapLoc + 1); + builtInUsage.tcs.tessLevelOuter = true; } else { - if (m_importedOutputBuiltIns.find(BuiltInTessLevelOuter) != m_importedOutputBuiltIns.end()) - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelOuter] = InvalidValue; - else + if (m_importedOutputBuiltIns.find(BuiltInTessLevelOuter) == m_importedOutputBuiltIns.end()) builtInUsage.tcs.tessLevelOuter = false; } if (nextBuiltInUsage.tessLevelInner) { - assert(nextInOutUsage.perPatchBuiltInInputLocMap.find(BuiltInTessLevelInner) != - nextInOutUsage.perPatchBuiltInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.perPatchBuiltInInputLocMap[BuiltInTessLevelInner]; - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelInner] = mapLoc; - availPerPatchOutMapLoc = std::max(availPerPatchOutMapLoc, mapLoc + 1); + builtInUsage.tcs.tessLevelInner = true; } else { - if (m_importedOutputBuiltIns.find(BuiltInTessLevelInner) != m_importedOutputBuiltIns.end()) - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelInner] = InvalidValue; - else + if (m_importedOutputBuiltIns.find(BuiltInTessLevelInner) == m_importedOutputBuiltIns.end()) builtInUsage.tcs.tessLevelInner = false; } + } - // Revisit built-in outputs and map those unmapped to generic ones - if (inOutUsage.builtInOutputLocMap.find(BuiltInPosition) != inOutUsage.builtInOutputLocMap.end() && - inOutUsage.builtInOutputLocMap[BuiltInPosition] == InvalidValue) - inOutUsage.builtInOutputLocMap[BuiltInPosition] = availOutMapLoc++; - - if (inOutUsage.builtInOutputLocMap.find(BuiltInPointSize) != inOutUsage.builtInOutputLocMap.end() && - inOutUsage.builtInOutputLocMap[BuiltInPointSize] == InvalidValue) - inOutUsage.builtInOutputLocMap[BuiltInPointSize] = availOutMapLoc++; - - if (inOutUsage.builtInOutputLocMap.find(BuiltInClipDistance) != inOutUsage.builtInOutputLocMap.end() && - inOutUsage.builtInOutputLocMap[BuiltInClipDistance] == InvalidValue) - inOutUsage.builtInOutputLocMap[BuiltInClipDistance] = availOutMapLoc++; - - if (inOutUsage.builtInOutputLocMap.find(BuiltInCullDistance) != inOutUsage.builtInOutputLocMap.end() && - inOutUsage.builtInOutputLocMap[BuiltInCullDistance] == InvalidValue) - inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = availOutMapLoc++; - - if (inOutUsage.builtInOutputLocMap.find(BuiltInLayer) != inOutUsage.builtInOutputLocMap.end() && - inOutUsage.builtInOutputLocMap[BuiltInLayer] == InvalidValue) - inOutUsage.builtInOutputLocMap[BuiltInLayer] = availOutMapLoc++; - - if (inOutUsage.builtInOutputLocMap.find(BuiltInViewportIndex) != inOutUsage.builtInOutputLocMap.end() && - inOutUsage.builtInOutputLocMap[BuiltInViewportIndex] == InvalidValue) - inOutUsage.builtInOutputLocMap[BuiltInViewportIndex] = availOutMapLoc++; - - if (inOutUsage.perPatchBuiltInOutputLocMap.find(BuiltInTessLevelOuter) != - inOutUsage.perPatchBuiltInOutputLocMap.end() && - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelOuter] == InvalidValue) - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelOuter] = availPerPatchOutMapLoc++; + if (builtInUsage.tcs.position) + inOutUsage.builtInOutputLocMap[BuiltInPosition] = availOutMapLoc++; - if (inOutUsage.perPatchBuiltInOutputLocMap.find(BuiltInTessLevelInner) != - inOutUsage.perPatchBuiltInOutputLocMap.end() && - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelInner] == InvalidValue) - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelInner] = availPerPatchOutMapLoc++; - } else if (!nextStage) { - // TCS only - if (builtInUsage.tcs.position) - inOutUsage.builtInOutputLocMap[BuiltInPosition] = availOutMapLoc++; + if (builtInUsage.tcs.pointSize) + inOutUsage.builtInOutputLocMap[BuiltInPointSize] = availOutMapLoc++; - if (builtInUsage.tcs.pointSize) - inOutUsage.builtInOutputLocMap[BuiltInPointSize] = availOutMapLoc++; - - if (builtInUsage.tcs.clipDistance > 0) { - inOutUsage.builtInOutputLocMap[BuiltInClipDistance] = availOutMapLoc++; - if (builtInUsage.tcs.clipDistance > 4) - ++availOutMapLoc; - } + if (builtInUsage.tcs.clipDistance > 0) { + inOutUsage.builtInOutputLocMap[BuiltInClipDistance] = availOutMapLoc++; + if (builtInUsage.tcs.clipDistance > 4) + ++availOutMapLoc; + } - if (builtInUsage.tcs.cullDistance > 0) { - inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = availOutMapLoc++; - if (builtInUsage.tcs.cullDistance > 4) - ++availOutMapLoc; - } + if (builtInUsage.tcs.cullDistance > 0) { + inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = availOutMapLoc++; + if (builtInUsage.tcs.cullDistance > 4) + ++availOutMapLoc; + } - if (builtInUsage.tcs.layerIn) - inOutUsage.builtInOutputLocMap[BuiltInLayer] = availOutMapLoc++; + if (builtInUsage.tcs.layerIn) + inOutUsage.builtInOutputLocMap[BuiltInLayer] = availOutMapLoc++; - if (builtInUsage.tcs.viewportIndexIn) - inOutUsage.builtInOutputLocMap[BuiltInViewportIndex] = availOutMapLoc++; + if (builtInUsage.tcs.viewportIndexIn) + inOutUsage.builtInOutputLocMap[BuiltInViewportIndex] = availOutMapLoc++; - if (builtInUsage.tcs.tessLevelOuter) - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelOuter] = availPerPatchOutMapLoc++; + if (builtInUsage.tcs.tessLevelOuter) + inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelOuter] = availPerPatchOutMapLoc++; - if (builtInUsage.tcs.tessLevelInner) - inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelInner] = availPerPatchOutMapLoc++; - } + if (builtInUsage.tcs.tessLevelInner) + inOutUsage.perPatchBuiltInOutputLocMap[BuiltInTessLevelInner] = availPerPatchOutMapLoc++; inOutUsage.inputMapLocCount = std::max(inOutUsage.inputMapLocCount, availInMapLoc); inOutUsage.outputMapLocCount = std::max(inOutUsage.outputMapLocCount, availOutMapLoc); @@ -2949,22 +2870,25 @@ void CollectResourceUsage::clearUnusedOutput() { auto &outputLocInfoMap = inOutUsage.outputLocInfoMap; if (nextStage) { // Collect the locations of TCS's imported outputs - DenseSet importOutputLocs; + DenseSet importedOutputLocs; if (m_shaderStage == ShaderStage::TessControl) { // Imported output calls - for (auto &outputImport : m_importedOutputCalls) { - unsigned loc = outputImport->getLocation(); - Value *const locOffset = outputImport->getLocOffset(); - Value *const compIdx = outputImport->getElemIdx(); - importOutputLocs.insert(loc); + for (auto &importedOutputCall : m_importedOutputCalls) { + unsigned loc = importedOutputCall->getLocation(); + Value *const locOffset = importedOutputCall->getLocOffset(); + Value *const compIdx = importedOutputCall->getElemIdx(); + importedOutputLocs.insert(loc); + // Location offset and component index are both constant if (isa(locOffset) && isa(compIdx)) { - loc += cast(locOffset)->getZExtValue(); - auto bitWidth = outputImport->getType()->getScalarSizeInBits(); - if (bitWidth == 64 && cast(compIdx)->getZExtValue() >= 2) { - // NOTE: For the addressing of .z/.w component of 64-bit vector/scalar, the count of - // occupied locations are two. - importOutputLocs.insert(loc + 1); + const unsigned constLocOffset = cast(locOffset)->getZExtValue(); + const unsigned constCompIdx = cast(compIdx)->getZExtValue(); + + loc += constLocOffset; + const auto &outputTy = importedOutputCall->getType(); + if (constCompIdx * outputTy->getScalarSizeInBits() + outputTy->getPrimitiveSizeInBits() > 128) { + // Access bits that are greater than 128 (vec4), need the next location. + importedOutputLocs.insert(loc + 1); } } } @@ -3003,12 +2927,13 @@ void CollectResourceUsage::clearUnusedOutput() { if (!isOutputXfb && !foundInNextStage) { // NOTE: If the output is used as an imported one in TCS, mark it as active to avoid its removal. - const bool isActiveLoc = m_shaderStage == ShaderStage::TessControl && importOutputLocs.count(origLoc) > 0; + const bool isActiveLoc = m_shaderStage == ShaderStage::TessControl && importedOutputLocs.count(origLoc) > 0; if (!isActiveLoc) unusedLocInfos.push_back(locInfoPair.first); } } } + // Remove those collected InOutLocationInfos for (auto &locInfo : unusedLocInfos) outputLocInfoMap.erase(locInfo); @@ -3024,12 +2949,13 @@ void CollectResourceUsage::clearUnusedOutput() { for (auto &locPair : perPatchOutputLocMap) { const unsigned loc = locPair.first; if (nextPerPatchInLocMap.find(loc) == nextPerPatchInLocMap.end()) { - if (importOutputLocs.find(loc) != importOutputLocs.end()) + if (importedOutputLocs.find(loc) != importedOutputLocs.end()) locPair.second = availPerPatchInMapLoc++; else unusedLocs.push_back(loc); } } + // Remove those collected locations for (auto loc : unusedLocs) perPatchOutputLocMap.erase(loc); @@ -3039,19 +2965,15 @@ void CollectResourceUsage::clearUnusedOutput() { if (m_shaderStage == ShaderStage::Mesh) { auto &perPrimitiveOutputLocMap = inOutUsage.perPrimitiveOutputLocMap; const auto &nextPerPrimitiveInLocMap = nextResUsage->inOutUsage.perPrimitiveInputLocMap; - unsigned availPerPrimitiveInMapLoc = nextResUsage->inOutUsage.perPrimitiveInputMapLocCount; // Collect locations of those outputs that are not used by next shader stage SmallVector unusedLocs; for (auto &locPair : perPrimitiveOutputLocMap) { const unsigned loc = locPair.first; - if (nextPerPrimitiveInLocMap.find(loc) == nextPerPrimitiveInLocMap.end()) { - if (importOutputLocs.find(loc) != importOutputLocs.end()) - locPair.second = availPerPrimitiveInMapLoc++; - else - unusedLocs.push_back(loc); - } + if (nextPerPrimitiveInLocMap.find(loc) == nextPerPrimitiveInLocMap.end()) + unusedLocs.push_back(loc); } + // Remove those collected locations for (auto loc : unusedLocs) perPrimitiveOutputLocMap.erase(loc); @@ -3210,6 +3132,9 @@ void CollectResourceUsage::updateOutputLocInfoMapWithUnpack() { inOutUsage.mesh.vertexOutputComponents[newLocation] = vertexOutputComponents[location]; } } + } else { + if (m_shaderStage == ShaderStage::Mesh) + inOutUsage.mesh.vertexOutputComponents.clear(); } // @@ -3321,6 +3246,9 @@ void CollectResourceUsage::updateOutputLocInfoMapWithUnpack() { inOutUsage.mesh.primitiveOutputComponents[newLocation] = primitiveOutputComponents[location]; } } + } else { + if (m_shaderStage == ShaderStage::Mesh) + inOutUsage.mesh.primitiveOutputComponents.clear(); } m_outputCalls.clear(); @@ -3394,7 +3322,7 @@ void CollectResourceUsage::updateOutputLocInfoMapWithPack() { return; assert(m_shaderStage == ShaderStage::Vertex || m_shaderStage == ShaderStage::TessEval || - m_shaderStage == ShaderStage::Geometry); // Possible stages + m_shaderStage == ShaderStage::Geometry || m_shaderStage == ShaderStage::Mesh); // Possible stages auto nextStage = m_pipelineState->getNextShaderStage(m_shaderStage.value()); auto &nextStageInputLocInfoMap = m_pipelineState->getShaderResourceUsage(nextStage.value())->inOutUsage.inputLocInfoMap; diff --git a/lgc/lowering/CombineCooperativeMatrix.cpp b/lgc/lowering/CombineCooperativeMatrix.cpp index 7bf25fa024..6a83c542aa 100644 --- a/lgc/lowering/CombineCooperativeMatrix.cpp +++ b/lgc/lowering/CombineCooperativeMatrix.cpp @@ -27,15 +27,15 @@ * @file CombineCooperativeMatrix.cpp * @brief Pass and helpers for combining cooperative matrix operations. * - * This pass is the place for combining / optimizing high-level cooperative matrix ops (@lgc.cooperative.matrix.*). + * This pass is the place for combining / optimizing high-level cooperative matrix ops (@lgc.xdl.cooperative.matrix.*). * * In particular, this pass reduces the number of transpose and convert operations. *********************************************************************************************************************** */ #include "lgc/lowering/CombineCooperativeMatrix.h" -#include "lgc/Builder.h" +#include "xdl/util/ElementType.h" #include "lgc/LgcDialect.h" -#include "lgc/state/Defs.h" +#include "lgc/LgcXdlDialect.h" #include "lgc/state/PipelineState.h" #include "lgc/state/TargetInfo.h" #include "llvm-dialects/Dialect/Visitor.h" @@ -46,13 +46,10 @@ using namespace llvm; using namespace lgc; +using namespace lgc::xdl; namespace lgc { -class CooperativeMatrixConvertOp; -class CooperativeMatrixTransposeOp; -class CooperativeMatrixMulAddOp; - struct Shape { CooperativeMatrixElementType elementType; CooperativeMatrixLayout layout; @@ -455,7 +452,7 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { b.SetInsertPointPastAllocas(&m_function); } - Type *resultMatrixTy = b.getCooperativeMatrixTy(component.shape->elementType, component.shape->layout); + Type *resultMatrixTy = getCooperativeMatrixTy(b, component.shape->elementType, component.shape->layout); auto *transposed = b.create(resultMatrixTy, PoisonValue::get(input->getType()), component.shape->elementType, component.shape->layout); foldTo(input, transposed); @@ -484,7 +481,7 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { b.SetInsertPoint(def->getNextNode()); } - Type *resultMatrixTy = b.getCooperativeMatrixTy(component.shape->elementType, component.shape->layout); + Type *resultMatrixTy = getCooperativeMatrixTy(b, component.shape->elementType, component.shape->layout); transposed = b.create(resultMatrixTy, use->get(), component.shape->elementType, component.shape->layout); } @@ -573,7 +570,7 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { b.SetInsertPointPastAllocas(&m_function); } - Type *resultMatrixTy = b.getCooperativeMatrixTy(component.shape->elementType, *otherLayout); + Type *resultMatrixTy = getCooperativeMatrixTy(b, component.shape->elementType, *otherLayout); CooperativeMatrixConvertOp *convert = b.create( resultMatrixTy, (CastInst::CastOps)0, PoisonValue::get(input->getType()), component.shape->elementType, component.shape->elementType, component.shape->layout, *otherLayout); @@ -609,7 +606,7 @@ bool CooperativeMatrixCombiner::tryFoldComponentContaining(Value *start) { b.SetInsertPoint(def->getNextNode()); } - Type *resultMatrixTy = b.getCooperativeMatrixTy(component.shape->elementType, component.shape->layout); + Type *resultMatrixTy = getCooperativeMatrixTy(b, component.shape->elementType, component.shape->layout); relayouted = b.create(resultMatrixTy, (CastInst::CastOps)0, use->get(), component.shape->elementType, component.shape->elementType, *otherLayout, component.shape->layout); diff --git a/lgc/lowering/Continufy.cpp b/lgc/lowering/Continufy.cpp index 3de4cb76cb..cee7bc65ad 100644 --- a/lgc/lowering/Continufy.cpp +++ b/lgc/lowering/Continufy.cpp @@ -32,6 +32,7 @@ #include "lgc/lowering/Continufy.h" #include "compilerutils/CompilerUtils.h" +#include "llpc/GpurtEnums.h" #include "llvmraytracing/ContinuationsUtil.h" #include "lgc/Builder.h" #include "lgc/LgcCpsDialect.h" @@ -59,7 +60,7 @@ static Function *insertCpsArguments(Function &fn) { auto *fnTy = fn.getFunctionType(); argTys.append(fnTy->params().begin(), fnTy->params().end()); - auto *newFn = CompilerUtils::mutateFunctionArguments(fn, Type::getVoidTy(context), argTys, fn.getAttributes()); + auto *newFn = compilerutils::mutateFunctionArguments(fn, Type::getVoidTy(context), argTys, fn.getAttributes()); fn.replaceAllUsesWith(newFn); for (unsigned idx = 0; idx < fn.arg_size(); idx++) { @@ -81,7 +82,7 @@ static Function *insertCpsArguments(Function &fn) { static unsigned getReturnedLevels(int stage) { // Traversal will return to RGS or CHS/MISS. if (stage == -1) - return 1u << (unsigned)CpsLevel::RayGen | 1u << (unsigned)CpsLevel::ClosestHit_Miss_Callable; + return 1u << (unsigned)CpsSchedulingLevel::RayGen | 1u << (unsigned)CpsSchedulingLevel::ClosestHit_Miss_Callable; RtStage rtStage = static_cast(stage); switch (rtStage) { @@ -90,39 +91,39 @@ static unsigned getReturnedLevels(int stage) { case RtStage::ClosestHit: case RtStage::Miss: // Traversal - return (1u << (unsigned)CpsLevel::Traversal); + return (1u << (unsigned)CpsSchedulingLevel::Traversal); case RtStage::Callable: // CHS/Miss/Callable | RGS - return (1u << (unsigned)CpsLevel::ClosestHit_Miss_Callable | 1u << (unsigned)CpsLevel::RayGen); + return (1u << (unsigned)CpsSchedulingLevel::ClosestHit_Miss_Callable | 1u << (unsigned)CpsSchedulingLevel::RayGen); case RtStage::AnyHit: // IS | Traversal - return (1u << (unsigned)CpsLevel::Intersection | 1u << (unsigned)CpsLevel::Traversal); + return (1u << (unsigned)CpsSchedulingLevel::Intersection | 1u << (unsigned)CpsSchedulingLevel::Traversal); case RtStage::Intersection: // Traversal - return 1u << (unsigned)CpsLevel::Traversal; + return 1u << (unsigned)CpsSchedulingLevel::Traversal; default: llvm_unreachable("Unknown raytracing shader type."); } } -/// Return CPS level of the ray-tracing stage. -static CpsLevel getCpsLevelFromRtStage(int stage) { +/// Return CPS scheduling level of the ray-tracing stage. +static CpsSchedulingLevel getCpsLevelFromRtStage(int stage) { // Traversal if (stage == -1) - return CpsLevel::Traversal; + return CpsSchedulingLevel::Traversal; RtStage rtStage = static_cast(stage); switch (rtStage) { case RtStage::RayGeneration: - return CpsLevel::RayGen; + return CpsSchedulingLevel::RayGen; case RtStage::ClosestHit: case RtStage::Miss: case RtStage::Callable: - return CpsLevel::ClosestHit_Miss_Callable; + return CpsSchedulingLevel::ClosestHit_Miss_Callable; case RtStage::AnyHit: - return CpsLevel::AnyHit_CombinedIntersection_AnyHit; + return CpsSchedulingLevel::AnyHit_CombinedIntersection_AnyHit; case RtStage::Intersection: - return CpsLevel::Intersection; + return CpsSchedulingLevel::Intersection; default: llvm_unreachable("Unknown raytracing shader type."); } @@ -149,7 +150,7 @@ PreservedAnalyses Continufy::run(Module &module, ModuleAnalysisManager &analysis if (continufyStage) { fnPtr = insertCpsArguments(fn); currentRtStage = mdconst::extract(continufyStage->getOperand(0))->getSExtValue(); - CpsLevel level = getCpsLevelFromRtStage(currentRtStage.value()); + CpsSchedulingLevel level = getCpsLevelFromRtStage(currentRtStage.value()); setCpsFunctionLevel(*fnPtr, level); } @@ -168,7 +169,7 @@ PreservedAnalyses Continufy::run(Module &module, ModuleAnalysisManager &analysis builder.SetInsertPoint(&call); auto *continuationRef = builder.CreatePtrToInt(called, IntegerType::get(context, 32)); - CpsLevel calleeLevel = + CpsSchedulingLevel calleeLevel = getCpsLevelFromRtStage(mdconst::extract(calleeStage->getOperand(0))->getSExtValue()); continuationRef = builder.CreateOr(continuationRef, builder.getInt32((uint32_t)calleeLevel)); diff --git a/lgc/lowering/FragmentColorExport.cpp b/lgc/lowering/FragmentColorExport.cpp index d5df152db1..cf81cb9422 100644 --- a/lgc/lowering/FragmentColorExport.cpp +++ b/lgc/lowering/FragmentColorExport.cpp @@ -488,7 +488,8 @@ PreservedAnalyses LowerFragmentColorExport::run(Module &module, ModuleAnalysisMa } FragmentColorExport fragColorExport(m_pipelineState->getLgcContext()); - bool dummyExport = m_resUsage->builtInUsage.fs.discard || m_pipelineState->getOptions().forceFragColorDummyExport || + bool dummyExport = m_resUsage->builtInUsage.fs.discard || + m_pipelineState->getOptions().forceNullFsDummyExport && m_resUsage->inOutUsage.fs.isNullFs || m_pipelineState->getShaderModes()->getFragmentShaderMode().enablePops; FragmentColorExport::Key key = FragmentColorExport::computeKey(m_info, m_pipelineState); fragColorExport.generateExportInstructions(m_info, m_exportValues, dummyExport, m_pipelineState->getPalMetadata(), diff --git a/lgc/lowering/LgcLowering.cpp b/lgc/lowering/LgcLowering.cpp index 4a15c568b9..40727b4fd6 100644 --- a/lgc/lowering/LgcLowering.cpp +++ b/lgc/lowering/LgcLowering.cpp @@ -66,7 +66,7 @@ #include "lgc/lowering/StructurizeBuffers.h" #include "lgc/lowering/VertexFetch.h" -#if LLPC_BUILD_STRIX1 +#if LLPC_BUILD_STRIX1 || LLPC_BUILD_STRIX_HALO #include "lgc/lowering/WorkaroundDsSubdwordWrite.h" #endif #include "lgc/Debug.h" @@ -211,7 +211,7 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T passMgr.addPass(createModuleToFunctionPassAdaptor(LowerInvariantLoads())); passMgr.addPass(createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(AddLoopMetadata()))); -#if LLPC_BUILD_STRIX1 +#if LLPC_BUILD_STRIX1 || LLPC_BUILD_STRIX_HALO passMgr.addPass(WorkaroundDsSubdwordWrite()); #endif diff --git a/lgc/lowering/LowerBufferOperations.cpp b/lgc/lowering/LowerBufferOperations.cpp index 768dc452b7..7ae6953789 100644 --- a/lgc/lowering/LowerBufferOperations.cpp +++ b/lgc/lowering/LowerBufferOperations.cpp @@ -35,6 +35,7 @@ #include "lgc/state/IntrinsDefs.h" #include "lgc/state/PipelineState.h" #include "lgc/state/TargetInfo.h" +#include "lgc/util/BufferResource.h" #include "llvm-dialects/Dialect/Visitor.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" @@ -46,7 +47,7 @@ #define DEBUG_TYPE "lgc-lower-buffer-operations" -using namespace CompilerUtils; +using namespace compilerutils; using namespace llvm; using namespace lgc; @@ -727,21 +728,27 @@ void BufferOpLowering::visitConvertToStridedBufferPointer(ConvertToStridedBuffer m_builder.SetInsertPoint(&convertToStrided); auto *oldDescriptor = values[0]; - - auto *currentDword1 = m_builder.CreateExtractElement(oldDescriptor, 1); + Value *newDescriptor = nullptr; auto *stride = m_builder.getInt32(convertToStrided.getStride()); - auto *newDword1 = m_builder.CreateAnd(currentDword1, ~0x3FFF0000); - newDword1 = m_builder.CreateOr(newDword1, m_builder.CreateShl(stride, 16)); - auto *newDescriptor = m_builder.CreateInsertElement(oldDescriptor, newDword1, 1); - - auto *currentNumRecords = m_builder.CreateExtractElement(newDescriptor, 2); - auto *newNumRecords = m_builder.CreateUDiv(currentNumRecords, stride); - newDescriptor = m_builder.CreateInsertElement(newDescriptor, newNumRecords, 2); - auto *currentDword3 = m_builder.CreateExtractElement(newDescriptor, 3); - currentDword3 = m_builder.CreateAnd(currentDword3, 0xCFFFFFFF); - currentDword3 = m_builder.CreateOr(currentDword3, 0x10000000); - newDescriptor = m_builder.CreateInsertElement(newDescriptor, currentDword3, 3); + if (m_pipelineState.getTargetInfo().getGfxIpVersion().major <= 12) { + // Set stride[61:48] + auto *currentDword1 = m_builder.CreateExtractElement(oldDescriptor, 1); + auto *newDword1 = m_builder.CreateAnd(currentDword1, ~0x3FFF0000); + newDword1 = m_builder.CreateOr(newDword1, m_builder.CreateShl(stride, 16)); + newDescriptor = m_builder.CreateInsertElement(oldDescriptor, newDword1, 1); + // Set NumRecords[95:64] + auto *currentNumRecords = m_builder.CreateExtractElement(newDescriptor, 2); + auto *newNumRecords = m_builder.CreateUDiv(currentNumRecords, stride); + newDescriptor = m_builder.CreateInsertElement(newDescriptor, newNumRecords, 2); + // Set OOB[125:124] as 0b01, (total_offset + payload) > numRecord + auto *currentDword3 = m_builder.CreateExtractElement(newDescriptor, 3); + currentDword3 = m_builder.CreateAnd(currentDword3, 0xCFFFFFFF); + currentDword3 = m_builder.CreateOr(currentDword3, 0x10000000); + newDescriptor = m_builder.CreateInsertElement(newDescriptor, currentDword3, 3); + } else { + llvm_unreachable("Unsupported gfxip"); + } m_typeLowering.replaceInstruction(&convertToStrided, {newDescriptor, values[1], m_builder.getInt32(0), m_builder.getFalse(), @@ -812,9 +819,7 @@ void BufferOpLowering::visitBufferLoadDescToPtr(BufferLoadDescToPtrOp &loadDescT m_typeLowering.replaceInstruction(&loadDescToPtr, {descriptor, ConstantPointerNull::get(m_offsetType), m_builder.getFalse(), PoisonValue::get(m_builder.getInt32Ty())}); } else { - Value *index = m_builder.CreatePtrToInt(loadDescToPtr.getDescPtr(), m_builder.getInt64Ty()); - index = m_builder.CreateBitCast(index, FixedVectorType::get(m_builder.getInt32Ty(), 2)); - index = m_builder.CreateExtractElement(index, m_builder.getInt64(0)); + Value *index = m_builder.CreatePtrToInt(loadDescToPtr.getDescPtr(), m_builder.getInt32Ty()); m_typeLowering.replaceInstruction(&loadDescToPtr, {descriptor, ConstantPointerNull::get(m_offsetType), m_builder.getTrue(), index}); } @@ -847,7 +852,7 @@ void BufferOpLowering::visitStridedBufferLoadDescToPtr(StridedBufferLoadDescToPt {descriptor, ConstantPointerNull::get(m_offsetType), m_builder.getInt32(0), m_builder.getFalse(), PoisonValue::get(m_builder.getInt32Ty())}); } else { - Value *index = m_builder.CreateBitCast(loadDescToPtr.getDescPtr(), m_builder.getInt32Ty()); + Value *index = m_builder.CreatePtrToInt(loadDescToPtr.getDescPtr(), m_builder.getInt32Ty()); m_typeLowering.replaceInstruction(&loadDescToPtr, {descriptor, ConstantPointerNull::get(m_offsetType), m_builder.getInt32(0), m_builder.getTrue(), index}); } @@ -896,11 +901,9 @@ void BufferOpLowering::visitBufferLength(BufferLengthOp &length) { auto values = m_typeLowering.getValue(length.getPointer()); Value *const bufferDesc = values[0]; - Value *numRecords = nullptr; - { - // Extract element 2 which is the NUM_RECORDS field from the buffer descriptor. - numRecords = m_builder.CreateZExt(m_builder.CreateExtractElement(bufferDesc, 2), m_builder.getInt64Ty()); - } + Value *numRecords = getBufferNumRecords(m_pipelineState.getTargetInfo().getGfxIpVersion(), m_builder, bufferDesc); + if (numRecords->getType()->getIntegerBitWidth() == 32) + numRecords = m_builder.CreateZExt(numRecords, m_builder.getInt64Ty()); Value *offset = length.getOffset(); // If null descriptors are allowed, we must guarantee a 0 result for a null buffer descriptor. @@ -1534,14 +1537,10 @@ Value *BufferOpLowering::replaceLoadStore(Instruction &inst) { const bool isStridedPointer = pointerOperand->getType()->getPointerAddressSpace() == ADDR_SPACE_BUFFER_STRIDED_POINTER; auto pointerValues = m_typeLowering.getValue(pointerOperand); - unsigned id = isStridedPointer ? 3 : 2; + Value *bufferDesc = pointerValues[0]; bool isIndexedDesc = false; - if (isa(pointerValues[id])) { - isIndexedDesc = cast(pointerValues[id])->isOne(); - if (isIndexedDesc) - bufferDesc = pointerValues[id + 1]; - } + unsigned isIndexedIdx = isStridedPointer ? 3 : 2; const DataLayout &dataLayout = m_builder.GetInsertBlock()->getModule()->getDataLayout(); @@ -1701,22 +1700,13 @@ Value *BufferOpLowering::replaceLoadStore(Instruction &inst) { accessSizeAllowed = accessSize >= 4; } -#if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 458033 - // Old version of the code - const bool isDivergentPtr = m_uniformityInfo.isDivergent(*pointerOperand); -#else - // New version of the code (also handles unknown version, which we treat as latest) const bool isDivergentPtr = m_uniformityInfo.isDivergent(pointerOperand); -#endif - const bool haveNonStridedDescriptor = !isStridedPointer || m_stridedDescriptors.contains(bufferDesc); - const bool is32BitStridedBufferLoad = isStridedPointer && intAccessType->getScalarSizeInBits() == 32; + if (isInvariant && !isDivergentDesc && accessSizeAllowed && - (haveNonStridedDescriptor || is32BitStridedBufferLoad) && (!indexValue || isa(indexValue) || !isDivergentPtr)) { // create s.buffer.load Value *desc = bufferDesc; - if (isIndexedDesc) - desc = m_builder.CreateLoad(FixedVectorType::get(m_builder.getInt32Ty(), 4), bufferDesc); + assert(desc->getType()->isVectorTy()); if (isStridedPointer) { // Especially when the index is a constant, and the stride is known at compile-time, // we should create s_buffer_load instructions with constant offsets: index * stride + offset @@ -1724,10 +1714,7 @@ Value *BufferOpLowering::replaceLoadStore(Instruction &inst) { if (m_stridedDescriptors.contains(desc)) { std::tie(desc, stride) = m_stridedDescriptors[desc]; } else { - Value *desc1 = m_builder.CreateExtractElement(desc, 1); - // stride is 61:48 bits in descriptor, which will always be constantInt when create BufferDesc - stride = - m_builder.CreateAnd(m_builder.CreateLShr(desc1, m_builder.getInt32(16)), m_builder.getInt32(0x3fff)); + stride = getBufferStride(m_pipelineState.getTargetInfo().getGfxIpVersion(), m_builder, desc); } Value *indexOffsetVal = m_builder.CreateMul(indexValue, stride); offsetVal = m_builder.CreateAdd(offsetVal, indexOffsetVal); @@ -1738,6 +1725,8 @@ Value *BufferOpLowering::replaceLoadStore(Instruction &inst) { call->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(m_builder.getContext(), {})); part = call; } else { + if (isIndexedDesc) + bufferDesc = pointerValues[isIndexedIdx + 1]; if (indexValue) { Intrinsic::ID intrinsic = Intrinsic::amdgcn_struct_buffer_load; #if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 506212 @@ -1758,6 +1747,8 @@ Value *BufferOpLowering::replaceLoadStore(Instruction &inst) { } } else { // Store + if (isIndexedDesc) + bufferDesc = pointerValues[isIndexedIdx + 1]; unsigned compCount = accessSize / smallestByteSize; part = PoisonValue::get(FixedVectorType::get(smallestType, compCount)); @@ -1899,21 +1890,20 @@ Instruction *BufferOpLowering::makeLoop(Value *const loopStart, Value *const loo Value *BufferOpLowering::createGlobalPointerAccess(Value *const bufferDesc, Value *const offset, Value *const strideIndex, Type *const type, Instruction &inst, const function_ref callback) { - // The 2nd element (NUM_RECORDS) in the buffer descriptor is byte bound. - Value *bound = m_builder.CreateExtractElement(bufferDesc, 2); + Value *bound = getBufferNumRecords(m_pipelineState.getTargetInfo().getGfxIpVersion(), m_builder, bufferDesc); Value *newOffset = offset; // index is for strided load which we need to handle the stride of the SRD. if (strideIndex || m_pipelineState.getOptions().checkRawBufferAccessDescStride) { - Value *desc1 = m_builder.CreateExtractElement(bufferDesc, 1); - Value *stride = - m_builder.CreateAnd(m_builder.CreateLShr(desc1, m_builder.getInt32(16)), m_builder.getInt32(0x3fff)); - Value *byteBound = m_builder.CreateMul(bound, stride); + Value *stride = getBufferStride(m_pipelineState.getTargetInfo().getGfxIpVersion(), m_builder, bufferDesc); + Value *byteBound = bound; + if (m_pipelineState.getTargetInfo().getGfxIpVersion().major <= 12) + byteBound = m_builder.CreateMul(bound, stride); if (strideIndex) { bound = byteBound; newOffset = m_builder.CreateAdd(m_builder.CreateMul(strideIndex, stride), newOffset); - } else { + } else if (m_pipelineState.getTargetInfo().getGfxIpVersion().major <= 12) { // It is not a strided load, but it is possible that the application/client binds a strided descriptor so if // the stride is not zero, use bound in bytes to avoid wrong OOB check. stride = m_builder.CreateICmpNE(stride, m_builder.getInt32(0)); @@ -1921,6 +1911,9 @@ Value *BufferOpLowering::createGlobalPointerAccess(Value *const bufferDesc, Valu } } + if (bound->getType()->getIntegerBitWidth() == 64) + newOffset = m_builder.CreateZExt(newOffset, m_builder.getInt64Ty()); + Value *inBound = m_builder.CreateICmpULT(newOffset, bound); // If null descriptor or extended robust buffer access is allowed, we will create a branch to perform normal global @@ -1933,7 +1926,7 @@ Value *BufferOpLowering::createGlobalPointerAccess(Value *const bufferDesc, Valu Value *isNonNullDesc = m_builder.getTrue(); if (m_pipelineState.getOptions().allowNullDescriptor) { // Check dword2 against 0 for null descriptor - isNonNullDesc = m_builder.CreateICmpNE(bound, m_builder.getInt32(0)); + isNonNullDesc = m_builder.CreateICmpNE(bound, ConstantInt::get(bound->getType(), 0)); } Value *isInBound = m_pipelineState.getOptions().enableExtendedRobustBufferAccess ? inBound : m_builder.getTrue(); isValidAccess = m_builder.CreateAnd(isNonNullDesc, isInBound); @@ -1946,7 +1939,7 @@ Value *BufferOpLowering::createGlobalPointerAccess(Value *const bufferDesc, Valu // NOTE: The offset of out-of-bound overridden as 0 may cause unexpected result when the extended robustness access // is disabled. if (!m_pipelineState.getOptions().enableExtendedRobustBufferAccess) - newOffset = m_builder.CreateSelect(inBound, newOffset, m_builder.getInt32(0)); + newOffset = m_builder.CreateSelect(inBound, newOffset, ConstantInt::get(newOffset->getType(), 0)); // Add on the index to the address. Value *pointer = m_builder.CreateGEP(m_builder.getInt8Ty(), baseAddr, newOffset); diff --git a/lgc/lowering/LowerCooperativeMatrix.cpp b/lgc/lowering/LowerCooperativeMatrix.cpp index 4bf88b1c36..9086d255c9 100644 --- a/lgc/lowering/LowerCooperativeMatrix.cpp +++ b/lgc/lowering/LowerCooperativeMatrix.cpp @@ -29,9 +29,11 @@ *********************************************************************************************************************** */ #include "lgc/lowering/LowerCooperativeMatrix.h" +#include "xdl/util/ElementType.h" #include "lgc/Builder.h" #include "lgc/LgcContext.h" #include "lgc/LgcDialect.h" +#include "lgc/LgcXdlDialect.h" #include "lgc/state/IntrinsDefs.h" #include "lgc/state/PipelineShaders.h" #include "lgc/state/PipelineState.h" @@ -45,6 +47,7 @@ using namespace llvm; using namespace lgc; +using namespace lgc::xdl; namespace lgc { @@ -211,10 +214,9 @@ LowerCooperativeMatrix::TypeProperties LowerCooperativeMatrix::getTypeProperties auto waveSize = m_pipelineState->getShaderWaveSize(m_shaderStage.value()); if (layout == CooperativeMatrixLayout::FactorMatrixLayout) { assert(elemType != CooperativeMatrixElementType::Float32 && elemType != CooperativeMatrixElementType::Int32); - props.numFlatElements = BuilderCommon::isTypeNCooperativeMatrix(elemType, 4) ? 8 : 16; + props.numFlatElements = isTypeNCooperativeMatrix(elemType, 4) ? 8 : 16; } else if (layout == CooperativeMatrixLayout::AccumulatorMatrixLayout) { - if (BuilderCommon::isTypeNCooperativeMatrix(elemType, 16) && - (elemType != CooperativeMatrixElementType::Float16Packed)) { + if (isTypeNCooperativeMatrix(elemType, 16) && (elemType != CooperativeMatrixElementType::Float16Packed)) { props.matrixElementStride = 2; } if (elemType == CooperativeMatrixElementType::Float16Packed) { @@ -272,7 +274,7 @@ Value *LowerCooperativeMatrix::convCoopMatrixVecToFlatVec(BuilderCommon &builder CooperativeMatrixElementType elemType, CooperativeMatrixLayout layout, unsigned kSize) { auto props = getTypeProperties(elemType, layout, kSize); - Type *elemTy = builder.transCooperativeMatrixElementType(elemType); + Type *elemTy = transCooperativeMatrixElementType(builder, elemType); if (elemTy->getScalarSizeInBits() < 8) elemTy = builder.getInt8Ty(); Type *flatType = FixedVectorType::get(elemTy, props.numMatrixElements); @@ -412,7 +414,7 @@ void LowerCooperativeMatrix::visitCooperativeMatrixLoadOp(CooperativeMatrixLoadO unsigned kSize = load.getKSize(); // Calc element offset in memory - Type *elemTy = builder.transCooperativeMatrixElementType(elemType); + Type *elemTy = transCooperativeMatrixElementType(builder, elemType); if (elemType == CooperativeMatrixElementType::Int4) elemTy = builder.getInt8Ty(); const unsigned dataBitwidth = elemTy->getScalarSizeInBits(); @@ -514,7 +516,7 @@ void LowerCooperativeMatrix::visitCooperativeMatrixStoreOp(CooperativeMatrixStor assert(waveSize == 32 || waveSize == 64); // Calc element offset in memory - Type *elemTy = builder.transCooperativeMatrixElementType(elemType); + Type *elemTy = transCooperativeMatrixElementType(builder, elemType); if (elemType == CooperativeMatrixElementType::Int4) elemTy = builder.getInt8Ty(); @@ -603,7 +605,7 @@ void LowerCooperativeMatrix::visitCooperativeMatrixFillOp(CooperativeMatrixFillO Value *value = fill.getScalar(); unsigned kSize = fill.getKSize(); auto props = getTypeProperties(elemType, layout, kSize); - Type *flatType = FixedVectorType::get(builder.transCooperativeMatrixElementType(elemType), props.numMatrixElements); + Type *flatType = FixedVectorType::get(transCooperativeMatrixElementType(builder, elemType), props.numMatrixElements); Value *vec = PoisonValue::get(flatType); for (unsigned idx = 0; idx < props.numMatrixElements; idx++) @@ -699,7 +701,7 @@ Value *LowerCooperativeMatrix::cooperativeMatrixConvertInternal(CastInst::CastOp if (dstElemType == CooperativeMatrixElementType::BFloat16) dstType = FixedVectorType::get(builder.getBFloatTy(), vecSize); else - dstType = FixedVectorType::get(builder.transCooperativeMatrixElementType(dstElemType), vecSize); + dstType = FixedVectorType::get(transCooperativeMatrixElementType(builder, dstElemType), vecSize); if (srcElemType == CooperativeMatrixElementType::BFloat16) { assert(source->getType()->isIntOrIntVectorTy()); @@ -827,8 +829,8 @@ void LowerCooperativeMatrix::visitCooperativeMatrixConvertOp(CooperativeMatrixCo convert.getName(), &convert); } } else { - unsigned numSrcBit = builder.transCooperativeMatrixElementType(srcElemType)->getScalarSizeInBits(); - unsigned numDstBit = builder.transCooperativeMatrixElementType(dstElemType)->getScalarSizeInBits(); + unsigned numSrcBit = transCooperativeMatrixElementType(builder, srcElemType)->getScalarSizeInBits(); + unsigned numDstBit = transCooperativeMatrixElementType(builder, dstElemType)->getScalarSizeInBits(); // Step 1: Some cases need change the layout due to different element types before conversion. if ((numSrcBit < numDstBit) && (srcLayout != dstLayout)) { @@ -1656,24 +1658,25 @@ void LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp(CooperativeMatrixMul unsigned factorFlatElemNum = 0; unsigned matrixLength = 0; - if (BuilderCommon::isTypeNCooperativeMatrix(matrixAType, 16)) { + if (isTypeNCooperativeMatrix(matrixAType, 16)) { assert(matrixAType == matrixBType); if (m_gfxIp.major <= 11) factorFlatElemNum = 16; Type *factorType = - FixedVectorType::get(builder.transCooperativeMatrixElementType(matrixAType), factorFlatElemNum); + FixedVectorType::get(transCooperativeMatrixElementType(builder, matrixAType), factorFlatElemNum); matrixA = builder.CreateBitCast(matrixA, factorType); matrixB = builder.CreateBitCast(matrixB, factorType); - } else if (BuilderCommon::isTypeNCooperativeMatrix(matrixAType, 8)) { - } else if (!BuilderCommon::isTypeNCooperativeMatrix(matrixAType, 4)) { + } else if (isTypeNCooperativeMatrix(matrixAType, 8)) { + } else if (isTypeNCooperativeMatrix(matrixAType, 4)) { + } else { llvm_unreachable("Factor element type is not supported!"); } - if (BuilderCommon::isTypeNCooperativeMatrix(matrixCType, 32)) { + if (isTypeNCooperativeMatrix(matrixCType, 32)) { if (m_gfxIp.major <= 12) matrixC = waveSize == 64 ? builder.CreateShuffleVector(matrixC, ArrayRef({0, 1, 2, 3}), "shuffleVector") : matrixC; - } else if (BuilderCommon::isTypeNCooperativeMatrix(matrixCType, 16)) { + } else if (isTypeNCooperativeMatrix(matrixCType, 16)) { if (m_gfxIp.major == 12) { // When gfxIp.major > 12, waveSize will always be 32 then matrixC size is solid without any necessary swizzle. matrixC = @@ -1750,11 +1753,9 @@ void LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp(CooperativeMatrixMul } matrixD = builder.CreateIntrinsic(retTy, intrinsic, args, nullptr, instName); - if (BuilderCommon::isTypeNCooperativeMatrix(matrixCType, 16)) { + if (isTypeNCooperativeMatrix(matrixCType, 16)) { unsigned coopVeclength = cast(matrixD->getType())->getNumElements(); - Type *wordTy = builder.transCooperativeMatrixElementType(matrixCType)->isIntOrIntVectorTy() - ? builder.getInt32Ty() - : builder.getFloatTy(); + Type *wordTy = isUnderlyingIntegerCooperativeMatrix(matrixCType) ? builder.getInt32Ty() : builder.getFloatTy(); matrixD = builder.CreateBitCast(matrixD, FixedVectorType::get(wordTy, coopVeclength / 2)); { matrixD = waveSize == 64 ? builder.CreateShuffleVector(matrixD, PoisonValue::get(matrixD->getType()), @@ -1809,7 +1810,8 @@ void LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp(CooperativeMatrixMul } } else if (matrixCType == CooperativeMatrixElementType::Int16 || matrixCType == CooperativeMatrixElementType::Float16) { - dotProductValue = PoisonValue::get(FixedVectorType::get(builder.transCooperativeMatrixElementType(matrixCType), 8)); + dotProductValue = + PoisonValue::get(FixedVectorType::get(transCooperativeMatrixElementType(builder, matrixCType), 8)); // For gfx10, A*B:8*float32->16*half C: no reshape for 16bit, still 16*half Value *colData = convCoopMatrixVecToFlatVec(builder, matrixB, matrixAType, CooperativeMatrixLayout::FactorMatrixLayout); @@ -2159,11 +2161,10 @@ void LowerCooperativeMatrix::visitCooperativeRowAccLoadOp(CooperativeRowAccLoadO auto stride = load.getStride(); auto elemType = load.getElemType(); auto memoryAccess = load.getMemoryAccess(); - - assert(builder.transCooperativeMatrixElementType(elemType) == load.getType()); + Type *elemTy = transCooperativeMatrixElementType(builder, elemType); + assert(elemTy == load.getType()); // Calc element offset in memory - Type *elemTy = builder.transCooperativeMatrixElementType(elemType); const unsigned dataBitwidth = elemTy->getScalarSizeInBits(); const unsigned addrSpace = dataPtr->getType()->getPointerAddressSpace(); assert(addrSpace == ADDR_SPACE_LOCAL || addrSpace == ADDR_SPACE_BUFFER_FAT_POINTER || addrSpace == ADDR_SPACE_GLOBAL); @@ -2203,11 +2204,10 @@ void LowerCooperativeMatrix::visitCooperativeRowAccStoreOp(CooperativeRowAccStor auto elemType = store.getElemType(); auto memoryAccess = store.getMemoryAccess(); auto data = store.getData(); - - assert(builder.transCooperativeMatrixElementType(elemType) == data->getType()); + Type *elemTy = transCooperativeMatrixElementType(builder, elemType); + assert(elemTy == data->getType()); // Calc element offset in memory - Type *elemTy = builder.transCooperativeMatrixElementType(elemType); const unsigned dataBitwidth = elemTy->getScalarSizeInBits(); const unsigned addrSpace = dataPtr->getType()->getPointerAddressSpace(); assert(addrSpace == ADDR_SPACE_LOCAL || addrSpace == ADDR_SPACE_BUFFER_FAT_POINTER || addrSpace == ADDR_SPACE_GLOBAL); @@ -2244,7 +2244,7 @@ void LowerCooperativeMatrix::visitCooperativeRowAccAccumulateModeOp(CooperativeR Value *rowAccVal = accumulateMode.getRowAcc(); auto elemType = accumulateMode.getElemType(); - assert(builder.transCooperativeMatrixElementType(elemType) == accumulateMode.getType()); + assert(transCooperativeMatrixElementType(builder, elemType) == accumulateMode.getType()); assert(accumulateMode.getType() == rowAccVal->getType()); if (m_gfxIp.major >= 12) @@ -2265,7 +2265,7 @@ void LowerCooperativeMatrix::visitCooperativeRowAccFinalizeModeOp(CooperativeRow Value *rowAccVal = finalize.getRowAcc(); auto elemType = finalize.getElemType(); - assert(builder.transCooperativeMatrixElementType(elemType) == finalize.getType()); + assert(transCooperativeMatrixElementType(builder, elemType) == finalize.getType()); assert(finalize.getType() == rowAccVal->getType()); if (m_gfxIp.major >= 12) @@ -2285,7 +2285,7 @@ void LowerCooperativeMatrix::visitCooperativeRowAccSplatOp(CooperativeRowAccSpla Value *scalar = splat.getScalar(); - assert(builder.transCooperativeMatrixElementType(splat.getElemType()) == scalar->getType()); + assert(transCooperativeMatrixElementType(builder, splat.getElemType()) == scalar->getType()); splat.replaceAllUsesWith(scalar); m_coopRowAccCalls.push_back(&splat); @@ -2305,7 +2305,7 @@ void LowerCooperativeMatrix::visitCooperativeRowAccExpandOp(CooperativeRowAccExp auto matrixLayout = expand.getMatrixLayout(); auto colMajor = expand.getColMajor(); - assert(builder.getCooperativeMatrixTy(matrixElemType, matrixLayout) == expand.getType()); + assert(getCooperativeMatrixTy(builder, matrixElemType, matrixLayout) == expand.getType()); assert(rowAccElemType == CooperativeMatrixElementType::Float16 || rowAccElemType == CooperativeMatrixElementType::Float32 || rowAccElemType == CooperativeMatrixElementType::Int32); @@ -2326,7 +2326,7 @@ void LowerCooperativeMatrix::visitCooperativeRowAccExpandOp(CooperativeRowAccExp assert(matrixLayout == CooperativeMatrixLayout::AccumulatorMatrixLayout); auto props = getTypeProperties(matrixElemType, matrixLayout, 16); Type *flatType = - FixedVectorType::get(builder.transCooperativeMatrixElementType(matrixElemType), props.numFlatElements); + FixedVectorType::get(transCooperativeMatrixElementType(builder, matrixElemType), props.numFlatElements); Value *flatVec = PoisonValue::get(flatType); if (!colMajor) { @@ -2532,8 +2532,8 @@ void LowerCooperativeMatrix::visitCooperativeRowAccScalarOp(CooperativeRowAccSca auto coopMatArithOp = scalar.getBinop(); bool accumulateMode = scalar.getAccumulateMode(); - assert(builder.transCooperativeMatrixElementType(elemType) == rowAccVal->getType()); - assert(builder.transCooperativeMatrixElementType(elemType) == scalarVal->getType()); + assert(transCooperativeMatrixElementType(builder, elemType) == rowAccVal->getType()); + assert(transCooperativeMatrixElementType(builder, elemType) == scalarVal->getType()); bool needHandleAccumulateMode = accumulateMode && (m_gfxIp.major >= 12); diff --git a/lgc/lowering/LowerDebugPrintf.cpp b/lgc/lowering/LowerDebugPrintf.cpp index 89fdc97a3d..181bb158ca 100644 --- a/lgc/lowering/LowerDebugPrintf.cpp +++ b/lgc/lowering/LowerDebugPrintf.cpp @@ -48,6 +48,7 @@ using namespace llvm; using namespace lgc; constexpr unsigned PrintfBufferBindingId = 6; + namespace lgc { // ===================================================================================================================== @@ -144,14 +145,29 @@ void LowerDebugPrintf::visitDebugPrintf(DebugPrintfOp &op) { m_elfInfos[hashValue].formatString = strDebugStr; m_elfInfos[hashValue].bit64Pos = bit64Vector; + writeToDebugPrintfBuffer(header, debugPrintfBuffer, printArgs, builder); +} + +// ===================================================================================================================== +// Write the variable arguments to debug printf buffer +// @header : 64 bit header {[0:15], [16:63]} entrySize,hash value for the string +// @debugPrintfBuffer : debug printf buffer val +// @varData : the variable arguments need to be written to buffer +// @builder: builder to generate llvm +void LowerDebugPrintf::writeToDebugPrintfBuffer(uint64_t header, Value *debugPrintfBuffer, + SmallVectorImpl &varData, BuilderBase &builder) { uint32_t loEntryheader = uint32_t(header); uint32_t hiEntryheader = uint32_t(header >> 32); + static const uint32_t EntryHeaderSize = 2; // 2Dword EntrySize + stringId + uint32_t entrySize = EntryHeaderSize + varData.size(); + // uint64_t offset = AtomicAdd64(offsetPtr, entrySize); // maxOffset = 1 << 29; // corresponds to 2GiB // offset = offset < maxOffset ? offset : maxOffset; Value *entryOffset = builder.CreateAtomicRMW(AtomicRMWInst::Add, debugPrintfBuffer, builder.getInt64(entrySize), MaybeAlign(8), AtomicOrdering::Monotonic, SyncScope::System); + Value *maxOffset = builder.getInt64(1U << 29); entryOffset = builder.CreateBinaryIntrinsic(Intrinsic::umin, entryOffset, maxOffset); entryOffset = builder.CreateTrunc(entryOffset, builder.getInt32Ty()); @@ -160,9 +176,9 @@ void LowerDebugPrintf::visitDebugPrintf(DebugPrintfOp &op) { entryOffset = builder.CreateAdd(entryOffset, builder.getInt32(4)); SmallVector outputVals = {builder.getInt32(loEntryheader), builder.getInt32(hiEntryheader)}; - outputVals.reserve(printArgs.size() + 2); + outputVals.reserve(varData.size() + 2); // Prepare the dword sequence of printf output variables - for (auto printArg : printArgs) + for (auto printArg : varData) outputVals.push_back(printArg); // Write the payload to debug buffer @@ -254,25 +270,28 @@ void LowerDebugPrintf::setupElfsPrintfStrings() { auto printfStrings = document->getRoot().getMap(true)[Util::Abi::PalCodeObjectMetadataKey::PrintfStrings].getMap(true); printfStrings[".version"] = 1; - printfStrings[".user_data_offset"] = m_topNode->offsetInDwords; + printfStrings[".user_data_offset"] = m_topNode ? m_topNode->offsetInDwords : 0; auto formatStrings = printfStrings[".strings"].getArray(true); unsigned i = 0; for (auto it = m_elfInfos.begin(); it != m_elfInfos.end(); ++it, ++i) { auto arrayElems = formatStrings[i].getMap(true); arrayElems[Util::Abi::PipelineMetadataKey::Index] = it->first; - arrayElems[Util::Abi::PipelineMetadataKey::String] = it->second.formatString; - auto &bitVector = it->second.bit64Pos; - unsigned argsCount = bitVector.size(); - arrayElems[".argument_count"] = argsCount; - // Convert bit array to the 64bits array - unsigned bit64ArgsCount = (argsCount + 63) / 64; - SmallVector bitInDword64s(bit64ArgsCount, 0); - for (unsigned j = 0; j < argsCount; ++j) { - bitInDword64s[j / 64] |= (bitVector[j] << (j % 64)); + { + arrayElems[Util::Abi::PipelineMetadataKey::String] = it->second.formatString; + + auto &bitVector = it->second.bit64Pos; + unsigned argsCount = bitVector.size(); + arrayElems[".argument_count"] = argsCount; + // Convert bit array to the 64bits array + unsigned bit64ArgsCount = (argsCount + 63) / 64; + SmallVector bitInDword64s(bit64ArgsCount, 0); + for (unsigned j = 0; j < argsCount; ++j) { + bitInDword64s[j / 64] |= (bitVector[j] << (j % 64)); + } + auto bit64Args = arrayElems[".64bit_arguments"].getArray(true); + for (unsigned j = 0; j < bit64ArgsCount; ++j) + bit64Args[j] = bitInDword64s[j]; } - auto bit64Args = arrayElems[".64bit_arguments"].getArray(true); - for (unsigned j = 0; j < bit64ArgsCount; ++j) - bit64Args[j] = bitInDword64s[j]; } } diff --git a/lgc/lowering/LowerGpuRt.cpp b/lgc/lowering/LowerGpuRt.cpp index f18e602721..1556d4eae7 100644 --- a/lgc/lowering/LowerGpuRt.cpp +++ b/lgc/lowering/LowerGpuRt.cpp @@ -34,6 +34,7 @@ #include "lgc/LgcContext.h" #include "lgc/builder/BuilderImpl.h" #include "lgc/state/TargetInfo.h" +#include "lgc/util/AddressExtender.h" #include "llvm-dialects/Dialect/Visitor.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -73,6 +74,7 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi .add(&LowerGpuRt::visitGetBoxSortHeuristicMode) .add(&LowerGpuRt::visitGetRayQueryDispatchId) .add(&LowerGpuRt::visitGetStaticFlags) + .add(&LowerGpuRt::visitMakePc) .add(&LowerGpuRt::visitGetTriangleCompressionMode) .add(&LowerGpuRt::visitGetFlattenedGroupThreadId) .add(&LowerGpuRt::visitFloatWithRoundMode) @@ -465,6 +467,43 @@ void LowerGpuRt::visitGetStaticFlags(GpurtGetStaticFlagsOp &inst) { m_funcsToLower.insert(inst.getCalledFunction()); } +// ===================================================================================================================== +// Visit "GpurtMakePcOp" instruction +// +// @param inst : The dialect instruction to process +// This generates the following IR for a call to @lgc.gpurt.make.pc(i32 %in32): +// clang-format off +// %pc64 = call i64 @llvm.amdgcn.s.getpc() +// %lshr = lshr i64 %pc64, 32 +// %high32 = trunc i64 %lshr to i32 +// %tmp1 = insertelement <2 x i32> poison, i32 %in32, i64 0 +// %tmp2 = insertelement <2 x i32> %tmp1, i32 %high32, i64 1 +// %tmp3 = bitcast <2 x i32> to i64 +// If the return type of the dialect op is an <2 x i32>, it bitcasts the result again: +// %bc = bitcast i64 %tmp3 to <2 x i32> +// clang-format on +// The original call is then replaced with either %tmp3 or %bc, dependent on the return type of the original dialect op. +void LowerGpuRt::visitMakePc(GpurtMakePcOp &inst) { + m_builder->SetInsertPoint(&inst); + + Value *highPc = m_builder->CreateIntrinsic(m_builder->getInt64Ty(), Intrinsic::amdgcn_s_getpc, {}); + highPc = m_builder->CreateTrunc(m_builder->CreateLShr(highPc, m_builder->getInt64(32)), m_builder->getInt32Ty()); + + BasicBlock *bb = inst.getParent(); + AddressExtender addressExtender(bb->getParent(), bb); + + Value *addr32 = inst.getVa(); + Value *replacement = nullptr; + replacement = addressExtender.extend(addr32, highPc, nullptr, *m_builder); + + // AddressExtender returns an i64 bitcast. Reconvert that to the vector return type if appropriate. + replacement = m_builder->CreateBitCast(replacement, inst.getType()); + inst.replaceAllUsesWith(replacement); + + m_callsToLower.push_back(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + // ===================================================================================================================== // Visit "GpurtGetTriangleCompressionModeOp" instruction // diff --git a/lgc/lowering/LowerInOut.cpp b/lgc/lowering/LowerInOut.cpp index 2b3248d4c0..ec34a59bcc 100644 --- a/lgc/lowering/LowerInOut.cpp +++ b/lgc/lowering/LowerInOut.cpp @@ -294,8 +294,8 @@ void LowerInOut::processShader() { // Initialize HW configurations for tessellation shaders if (m_shaderStage == ShaderStage::TessControl || m_shaderStage == ShaderStage::TessEval) { - const auto stageMask = m_pipelineState->getShaderStageMask(); - const bool hasTcs = stageMask.contains(ShaderStage::TessControl); + const bool hasTcs = m_pipelineState->hasShaderStage(ShaderStage::TessControl); + const bool hasTes = m_pipelineState->hasShaderStage(ShaderStage::TessEval); auto &hwConfig = m_pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->inOutUsage.tcs.hwConfig; if (!hwConfig.initialized) { @@ -320,9 +320,10 @@ void LowerInOut::processShader() { const auto &tcsInOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->inOutUsage; const auto &tesInOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval)->inOutUsage; - const unsigned inputLocCount = std::max(tcsInOutUsage.inputMapLocCount, 1u); - const unsigned outputLocCount = - hasTcs ? std::max(tcsInOutUsage.outputMapLocCount, 1u) : std::max(tesInOutUsage.inputMapLocCount, 1u); + const unsigned inputLocCount = std::max(tcsInOutUsage.inputMapLocCount, 1U); + const unsigned onChipOutputLocCount = std::max(tcsInOutUsage.outputMapLocCount, 1U); + const unsigned offChipOutputLocCount = + std::max(hasTes ? tesInOutUsage.inputMapLocCount : tcsInOutUsage.outputMapLocCount, 1U); const unsigned inputVertexCount = m_pipelineState->getNumPatchControlPoints(); const unsigned outputVertexCount = @@ -351,21 +352,23 @@ void LowerInOut::processShader() { hwConfig.onChip.inputVertexStride = (inputLocCount * 4) | 1; hwConfig.onChip.inputPatchSize = inputVertexCount * hwConfig.onChip.inputVertexStride; - hwConfig.onChip.outputVertexStride = (outputLocCount * 4) | 1; + hwConfig.onChip.outputVertexStride = (onChipOutputLocCount * 4) | 1; hwConfig.onChip.outputPatchSize = outputVertexCount * hwConfig.onChip.outputVertexStride; - hwConfig.offChip.outputVertexStride = outputLocCount * 4; + hwConfig.offChip.outputVertexStride = offChipOutputLocCount * 4; hwConfig.offChip.outputPatchSize = outputVertexCount * hwConfig.offChip.outputVertexStride; - const unsigned patchConstCount = - hasTcs ? tcsInOutUsage.perPatchOutputMapLocCount : tesInOutUsage.perPatchInputMapLocCount; + const unsigned onChipPatchConstCount = tcsInOutUsage.perPatchOutputMapLocCount; + const unsigned offChipPatchConstCount = + hasTes ? tesInOutUsage.perPatchInputMapLocCount : tcsInOutUsage.perPatchOutputMapLocCount; // Use odd-dword stride to avoid LDS bank conflict hwConfig.onChip.patchConstSize = 0; + if (onChipPatchConstCount > 0) + hwConfig.onChip.patchConstSize = (onChipPatchConstCount * 4) | 1; + hwConfig.offChip.patchConstSize = 0; - if (patchConstCount > 0) { - hwConfig.onChip.patchConstSize = (patchConstCount * 4) | 1; - hwConfig.offChip.patchConstSize = patchConstCount * 4; - } + if (offChipPatchConstCount > 0) + hwConfig.offChip.patchConstSize = offChipPatchConstCount * 4; const unsigned ldsSizePerPatch = hwConfig.onChip.outputPatchSize + hwConfig.onChip.patchConstSize + hwConfig.onChip.tessFactorStride + hwConfig.onChip.inputPatchSize; @@ -454,15 +457,16 @@ void LowerInOut::processShader() { LLPC_OUTS(" (HW TFs = " << tessFactorCount << " dwords)\n"); LLPC_OUTS("TF0/TF1 Messaging = " << (m_pipelineState->canOptimizeTessFactor() ? "true" : "false") << "\n"); LLPC_OUTS("\n"); - LLPC_OUTS("Tessellator Patch:\n"); + LLPC_OUTS("Tessellator Patch [OnChip, OffChip]:\n"); LLPC_OUTS("InputVertices = " << inputVertexCount << ", VertexStride = " << hwConfig.onChip.inputVertexStride << " dwords, Size = " << hwConfig.onChip.inputPatchSize << " dwords\n"); LLPC_OUTS("OutputVertices = " << outputVertexCount << ", VertexStride = [" << hwConfig.onChip.outputVertexStride << ", " << hwConfig.offChip.outputVertexStride << "] dwords, Size = [" << hwConfig.onChip.outputPatchSize << ", " << hwConfig.offChip.outputPatchSize << "] dwords\n"); - LLPC_OUTS("PatchConstants = " << patchConstCount << ", Size = [" << hwConfig.onChip.patchConstSize << ", " - << hwConfig.offChip.patchConstSize << "] dwords\n"); + LLPC_OUTS("PatchConstants = " + << "[" << onChipPatchConstCount << ", " << offChipPatchConstCount << "], Size = [" + << hwConfig.onChip.patchConstSize << ", " << hwConfig.offChip.patchConstSize << "] dwords\n"); LLPC_OUTS("\n"); LLPC_OUTS("Onchip LDS Layout (in dwords):\n"); @@ -2927,9 +2931,10 @@ Value *LowerInOut::patchTcsBuiltInOutputImport(Type *outputTy, unsigned builtInI if (outputTy->isArrayTy()) { // Handle the whole array + auto elemTy = outputTy->getArrayElementType(); for (unsigned i = 0; i < outputTy->getArrayNumElements(); ++i) { - auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, builder.getInt32(i), nullptr, builder); - auto elem = readValueFromLds(false, builder.getFloatTy(), ldsOffset, builder); + auto ldsOffset = calcLdsOffsetForTcsOutput(elemTy, loc, nullptr, builder.getInt32(i), nullptr, builder); + auto elem = readValueFromLds(false, elemTy, ldsOffset, builder); output = builder.CreateInsertValue(output, elem, {i}); } } else { @@ -3187,8 +3192,9 @@ void LowerInOut::patchTcsBuiltInOutputExport(Value *output, unsigned builtInId, if (outputTy->isArrayTy()) { // Handle the whole array for (unsigned i = 0; i < outputTy->getArrayNumElements(); ++i) { - auto ldsOffset = calcLdsOffsetForTcsOutput(outputTy, loc, nullptr, builder.getInt32(i), nullptr, builder); auto elem = builder.CreateExtractValue(output, {i}); + auto ldsOffset = + calcLdsOffsetForTcsOutput(elem->getType(), loc, nullptr, builder.getInt32(i), nullptr, builder); writeValueToLds(false, elem, ldsOffset, builder); } } else { diff --git a/lgc/lowering/LowerPopsInterlock.cpp b/lgc/lowering/LowerPopsInterlock.cpp index d5f6cfe999..2c0709e93d 100644 --- a/lgc/lowering/LowerPopsInterlock.cpp +++ b/lgc/lowering/LowerPopsInterlock.cpp @@ -103,29 +103,85 @@ void LowerPopsInterlock::legalizeInterlock(FunctionAnalysisManager &funcAnalysis // // Legalize begin_interlock by doing two steps: - // 1. Find the closest common dominator of all begin_interlocks + // 1. Find the nearest common dominator of all begin_interlocks // 2. If that is in a cycle, go up the dominator tree until it is not in a cycle. // assert(!m_beginInterlocks.empty()); // Must have at least one begin_interlock - auto nearestDom = m_beginInterlocks.front(); - for (unsigned i = 1; i < m_beginInterlocks.size(); ++i) - nearestDom = domTree.findNearestCommonDominator(nearestDom, m_beginInterlocks[i]); + Instruction *nearestDom = nullptr; + for (auto beginInterlock : m_beginInterlocks) { + // Not initialized yet, take this begin_interlock as the initial value + if (!nearestDom) { + nearestDom = beginInterlock; + continue; + } + + nearestDom = domTree.findNearestCommonDominator(nearestDom, beginInterlock); + } if (auto cycle = cycleInfo.getCycle(nearestDom->getParent())) nearestDom = cycle->getCyclePredecessor()->getTerminator(); - m_builder->SetInsertPoint(nearestDom); - m_builder->create(); - // // Legalize end_interlock by doing two steps: - // 1. Find the closest common dominator of all end_interlocks + // 1. Find the nearest common post dominator of all end_interlocks // 2. If that is in a cycle, go down the dominator tree until it is not in a cycle. // assert(!m_endInterlocks.empty()); // Must have at least one end_interlock - auto nearestPostDom = m_endInterlocks.front(); - for (unsigned i = 1; i < m_endInterlocks.size(); ++i) { - const auto endInterlock = m_endInterlocks[i]; + + // NOTE: Here, we first find the return block. This is because we may fail to find the nearest common post dominator + // block of two end_interlocks. See such case: + // + // entry: + // ... + // begin_interlock + // end_interlock + // ... + // switch i, default [ + // 0, A + // 1, B + // ] + // + // A: + // ... + // br C + // + // B: + // ... + // br C + // + // default: + // unreachable + // + // C: + // ... + // begin_interlock + // end_interlock + // ... + // + // From execution of real shaders, all shader will exit from the sole return block. The unreachable block of the + // above case only has IR semantical meaning. Therefore, we can safely exclude those end_interlocks whose parent + // blocks are not post dominated by the return block. + BasicBlock *returnBlock = nullptr; + for (auto &block : *m_entryPoint) { + if (isa(block.getTerminator())) { + returnBlock = █ + break; + } + } + assert(returnBlock); // Must find return block + + Instruction *nearestPostDom = nullptr; + for (auto endInterlock : m_endInterlocks) { + // Not post dominated by return block, exclude the end_interlock + if (!postDomTree.dominates(returnBlock, endInterlock->getParent())) + continue; + + // Not initialized yet, take this end_interlock as the initial value + if (!nearestPostDom) { + nearestPostDom = endInterlock; + continue; + } + if (endInterlock->getParent() == nearestPostDom->getParent()) { // In the same block, maybe update nearest post dominator if (nearestPostDom->comesBefore(endInterlock)) @@ -133,6 +189,7 @@ void LowerPopsInterlock::legalizeInterlock(FunctionAnalysisManager &funcAnalysis } else { auto nearestPostDomBlock = postDomTree.findNearestCommonDominator(nearestPostDom->getParent(), endInterlock->getParent()); + assert(nearestPostDomBlock); if (nearestPostDomBlock != nearestPostDom->getParent()) { // Block of the nearest post dominator is changed, have to update nearest post dominator if (nearestPostDomBlock == endInterlock->getParent()) { @@ -151,6 +208,13 @@ void LowerPopsInterlock::legalizeInterlock(FunctionAnalysisManager &funcAnalysis nearestPostDom = &*succBlocks[0]->getFirstInsertionPt(); }; + // Insert new begin_interlock after we find the nearest common dominator of all begin_interlocks. Likewise, insert + // new end_interlock after we find the nearest common post dominator of all end_interlocks. + assert(nearestDom); + m_builder->SetInsertPoint(nearestDom); + m_builder->create(); + + assert(nearestPostDom); m_builder->SetInsertPoint(nearestPostDom); m_builder->create(); diff --git a/lgc/lowering/LowerReadFirstLane.cpp b/lgc/lowering/LowerReadFirstLane.cpp index 7a553d804c..5153dcad8c 100644 --- a/lgc/lowering/LowerReadFirstLane.cpp +++ b/lgc/lowering/LowerReadFirstLane.cpp @@ -320,6 +320,13 @@ void ReadFirstLaneOptimizer::collectAssumeUniforms(BasicBlock *block, break; } + // Disallow uplifting readfirstlane across convergent operations which have cross-lane communication + auto call = dyn_cast(operandInst); + if (call && call->isConvergent()) { + cannotPropagate = true; + break; + } + operandInsts.push_back(operandInst); } @@ -339,12 +346,6 @@ void ReadFirstLaneOptimizer::collectAssumeUniforms(BasicBlock *block, while (!candidates.empty()) { Instruction *candidate = candidates.pop_back_val(); - if (auto intrinsic = dyn_cast(candidate)) { - // Don't lift readfirstlane that is manually added after permlane64 or permlanex16 in subgroupClusteredReduction - if (intrinsic->getIntrinsicID() == Intrinsic::amdgcn_permlane64 || - intrinsic->getIntrinsicID() == Intrinsic::amdgcn_permlanex16) - continue; - } if (isAllUsersAssumedUniform(candidate)) tryPropagate(candidate, false); } diff --git a/lgc/lowering/MeshTaskShader.cpp b/lgc/lowering/MeshTaskShader.cpp index ad01a337a7..5a41847b47 100644 --- a/lgc/lowering/MeshTaskShader.cpp +++ b/lgc/lowering/MeshTaskShader.cpp @@ -79,9 +79,9 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct // // 1. Internal mesh LDS: // - // +--------------+-----------------+--------------------+-------------------+-------------------+ - // | Vertex Count | Primitive Count | Barrier Completion | Flat Workgroup ID | Primitive Indices | >>> - // +--------------+-----------------+--------------------+-------------------+-------------------+ + // +--------------------+--------------------+-------------------+-------------------+ + // | Mesh Output Counts | Barrier Completion | Flat Workgroup ID | Primitive Indices | >>> + // +--------------------+--------------------+-------------------+-------------------+ // +----------------+-------------------+ // >>> | Vertex Outputs | Primitive Outputs | // +----------------+-------------------+ @@ -97,6 +97,12 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct assert(meshMode.outputVertices <= NggMaxThreadsPerSubgroup); assert(meshMode.outputPrimitives <= NggMaxThreadsPerSubgroup); + bool outputsToAllocas = meshOutputsToAllocas(pipelineState, entryPoint); + if (ldsLayout) { + assert(outputsLayout); + outputsLayout->outputsToAllocas = outputsToAllocas; + } + const auto resUsage = pipelineState->getShaderResourceUsage(ShaderStage::Mesh); const auto nextStage = pipelineState->getNextShaderStage(ShaderStage::Mesh); @@ -105,10 +111,10 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct unsigned ldsRegionSize = 0; auto printLdsRegionInfo = [=](const char *regionName, unsigned regionOffset, unsigned regionSize) { - LLPC_OUTS(format("%-40s : offset = 0x%04" PRIX32 ", size = 0x%04" PRIX32, regionName, regionOffset, regionSize)); if (regionSize == 0) - LLPC_OUTS(" (empty)"); - LLPC_OUTS("\n"); + return; + LLPC_OUTS( + format("%-30s : offset = 0x%04" PRIX32 ", size = 0x%04" PRIX32 "\n", regionName, regionOffset, regionSize)); }; auto printOutputLayoutInfo = [=](unsigned location, unsigned numComponents, unsigned relativeOffset, @@ -134,20 +140,13 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct LLPC_OUTS("// LLPC mesh shader LDS region info (in dwords) and general info\n\n"); } - // Vertex count - ldsRegionSize = 1; // A dword corresponds to vertex count (i32) + // Mesh output counts + ldsRegionSize = 2; // Two dwords correspond to vertex/primitive count (i32) if (ldsLayout) { - printLdsRegionInfo("Vertex Count", ldsOffsetInDwords, ldsRegionSize); - (*ldsLayout)[MeshLdsRegion::VertexCount] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); - ldsOffsetInDwords += ldsRegionSize; - } - meshLdsSizeInDwords += ldsRegionSize; - - // Primitive count - ldsRegionSize = 1; // A dword corresponds to primitive count (i32) - if (ldsLayout) { - printLdsRegionInfo("Primitive Count", ldsOffsetInDwords, ldsRegionSize); - (*ldsLayout)[MeshLdsRegion::PrimitiveCount] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); + // Make sure this region starts from zero offset in order to use 64-bit LDS access (8-byte alignment) later on. + assert(ldsOffsetInDwords == 0); + printLdsRegionInfo("Mesh Output Counts", ldsOffsetInDwords, ldsRegionSize); + (*ldsLayout)[MeshLdsRegion::MeshOutputCounts] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); ldsOffsetInDwords += ldsRegionSize; } meshLdsSizeInDwords += ldsRegionSize; @@ -173,7 +172,8 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct } // Primitive indices - ldsRegionSize = meshMode.outputPrimitives; // Each dword corresponds to primitive connectivity data (i32) + ldsRegionSize = + outputsToAllocas ? 0 : meshMode.outputPrimitives; // Each dword corresponds to primitive connectivity data (i32) if (ldsLayout) { printLdsRegionInfo("Primitive Indices", ldsOffsetInDwords, ldsRegionSize); (*ldsLayout)[MeshLdsRegion::PrimitiveIndices] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); @@ -181,17 +181,19 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct } meshLdsSizeInDwords += ldsRegionSize; - // Per-vertex outputs + // Vertex outputs auto &vertexOutputComponents = resUsage->inOutUsage.mesh.vertexOutputComponents; unsigned vertexStride = 0; - for (auto &vertexOutput : vertexOutputComponents) { - const auto numComponents = vertexOutput.second.first; - vertexStride += numComponents; // Calculate total number of components of vertex outputs + if (!outputsToAllocas) { + for (auto &vertexOutput : vertexOutputComponents) { + const auto numComponents = vertexOutput.second.first; + vertexStride += numComponents; // Calculate total number of components of vertex outputs + } } ldsRegionSize = vertexStride * meshMode.outputVertices; if (ldsLayout) { - printLdsRegionInfo("Per-vertex Output", ldsOffsetInDwords, ldsRegionSize); + printLdsRegionInfo("Vertex Outputs", ldsOffsetInDwords, ldsRegionSize); (*ldsLayout)[MeshLdsRegion::VertexOutput] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); assert(outputsLayout); @@ -205,8 +207,10 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct const auto location = vertexOutput.first; const auto &[numComponents, forBuiltIn] = vertexOutput.second; - outputsLayout->offsetsInVertex[location] = offsetInVertex; // Map output locations to relative offsets in vertex - offsetInVertex += numComponents; + if (!outputsToAllocas) { + outputsLayout->offsetsInVertex[location] = offsetInVertex; // Map output locations to relative offsets in vertex + offsetInVertex += numComponents; + } if (forBuiltIn == InvalidValue) { // Only consider vertex generic outputs, vertex built-ins will be handled later on @@ -287,17 +291,19 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct } meshLdsSizeInDwords += ldsRegionSize; - // Per-primitive outputs + // Primitive outputs auto &primitiveOutputComponents = resUsage->inOutUsage.mesh.primitiveOutputComponents; unsigned primitiveStride = 0; - for (auto &primitiveOutput : primitiveOutputComponents) { - const auto numComponents = primitiveOutput.second.first; - primitiveStride += numComponents; // Calculate total number of components of primitive outputs + if (!outputsToAllocas) { + for (auto &primitiveOutput : primitiveOutputComponents) { + const auto numComponents = primitiveOutput.second.first; + primitiveStride += numComponents; // Calculate total number of components of primitive outputs + } } ldsRegionSize = primitiveStride * meshMode.outputPrimitives; if (ldsLayout) { - printLdsRegionInfo("Per-primitive Output", ldsOffsetInDwords, ldsRegionSize); + printLdsRegionInfo("Primitive Outputs", ldsOffsetInDwords, ldsRegionSize); (*ldsLayout)[MeshLdsRegion::PrimitiveOutput] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); assert(outputsLayout); @@ -322,9 +328,11 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct const auto location = primitiveOutput.first; const auto &[numComponents, forBuiltIn] = primitiveOutput.second; - outputsLayout->offsetsInPrimitive[location] = - offsetInPrimitive; // Map output locations to relative offsets in primitive - offsetInPrimitive += numComponents; + if (!outputsToAllocas) { + outputsLayout->offsetsInPrimitive[location] = + offsetInPrimitive; // Map output locations to relative offsets in primitive + offsetInPrimitive += numComponents; + } if (forBuiltIn == InvalidValue) { // Only consider primitive generic outputs, primitive built-ins will be handled later on @@ -440,14 +448,14 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct sharedVarLdsSizeInDwords += sizeInBytes / 4; } - // Setup internal mesh LDS - getOrCreateMeshLds(entryPoint->getParent(), meshLdsSizeInDwords); - if (ldsLayout) { + // Setup internal mesh LDS + getOrCreateMeshLds(entryPoint->getParent(), meshLdsSizeInDwords); + LLPC_OUTS("\n"); - printLdsRegionInfo("Internal Mesh LDS", 0, meshLdsSizeInDwords); - printLdsRegionInfo("Shared Variable LDS", 0, sharedVarLdsSizeInDwords); - printLdsRegionInfo("Total LDS", 0, meshLdsSizeInDwords + sharedVarLdsSizeInDwords); + LLPC_OUTS("Internal Mesh LDS = " << meshLdsSizeInDwords << " dwords\n"); + LLPC_OUTS("Shared Variable LDS = " << sharedVarLdsSizeInDwords << " dwords\n"); + LLPC_OUTS("Total LDS = " << meshLdsSizeInDwords + sharedVarLdsSizeInDwords << " dwords\n"); if (!outputsLayout->offsetsInVertex.empty()) { LLPC_OUTS("\nVertex Outputs Layout (stride = " << outputsLayout->vertexStride @@ -484,6 +492,8 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct } LLPC_OUTS("\n"); + LLPC_OUTS("RowExport = " << (usesRowExport(pipelineState) ? "true" : "false") << "\n"); + LLPC_OUTS("OutputsToAllocas = " << (outputsLayout->outputsToAllocas ? "true" : "false") << "\n"); LLPC_OUTS("Workgroup Size (X, Y, Z) = (" << meshMode.workgroupSizeX << ", " << meshMode.workgroupSizeY << ", " << meshMode.workgroupSizeZ << ")\n"); LLPC_OUTS("NumMeshThreads = " << meshMode.workgroupSizeX * meshMode.workgroupSizeY * meshMode.workgroupSizeZ @@ -574,6 +584,101 @@ unsigned MeshTaskShader::useFlatWorkgroupId(PipelineState *pipelineState) { return builtInUsage.workgroupId || builtInUsage.globalInvocationId; } +// ===================================================================================================================== +// Check whether we actually use row export for mesh shader. +// +// @param pipelineState : Pipeline state +// @returns : Whether row export can be actually used. +bool MeshTaskShader::usesRowExport(PipelineState *pipelineState) { + if (!pipelineState->enableMeshRowExport()) + return false; // Not enabled + + const auto &meshMode = pipelineState->getShaderModes()->getMeshShaderMode(); + + const unsigned waveSize = pipelineState->getShaderWaveSize(ShaderStage::Mesh); + const unsigned numMeshThreads = meshMode.workgroupSizeX * meshMode.workgroupSizeY * meshMode.workgroupSizeZ; + const unsigned numExportThreads = std::max(meshMode.outputPrimitives, meshMode.outputVertices); + + // If we have enough threads after HW GS amplification to export primitives/vertices, row export is actually not used. + if (alignTo(numExportThreads, waveSize) <= alignTo(numMeshThreads, waveSize)) + return false; + + return true; +} + +// ===================================================================================================================== +// Check whether mesh outputs can be written to allocas without through LDS. +// +// @param pipelineState : Pipeline state +// @param entryPoint : Entry-point of mesh shader +// @returns : Whether mesh outputs can be written to allocas +bool MeshTaskShader::meshOutputsToAllocas(PipelineState *pipelineState, Function *entryPoint) { + assert(getShaderStage(entryPoint) == ShaderStage::Mesh); + + const auto &meshMode = pipelineState->getShaderModes()->getMeshShaderMode(); + const bool linearDispatch = meshMode.workgroupSizeY == 1 && meshMode.workgroupSizeZ == 1; + + std::function fromLocalInvocationIndex = [&](Value *primOrVertexIndex) -> bool { + if (auto getMeshBuiltInInputOp = dyn_cast(primOrVertexIndex)) { + auto builtin = getMeshBuiltInInputOp->getBuiltin(); + if (builtin == BuiltInLocalInvocationIndex || builtin == BuiltInLocalInvocationId) { + // Use LocalInvocationIndex or LocalInvocationId + return true; + } + } else if (auto extractElement = dyn_cast(primOrVertexIndex)) { + if (linearDispatch) { + // Linear dispatch (X, Y=1, Z=1) + auto vectorOp = extractElement->getVectorOperand(); + auto constIndexOp = dyn_cast(extractElement->getIndexOperand()); + if (fromLocalInvocationIndex(vectorOp) && constIndexOp && constIndexOp->getZExtValue() == 0) { + // Use LocalInvocationID.X (equivalent to LocalInvocationIndex in linear dispatch + return true; + } + } + } else if (auto freeze = dyn_cast(primOrVertexIndex)) { + return fromLocalInvocationIndex(freeze->getOperand(0)); + } + + return false; + }; + + IRBuilder<> builder(pipelineState->getContext()); + bool toAllocas = true; + + struct Payload { + IRBuilder<> &builder; + std::function fromLocalInvocationIndex; + bool &toAllocas; + }; + Payload payload = {builder, fromLocalInvocationIndex, toAllocas}; + + static const auto visitor = + llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add([](Payload &payload, SetMeshPrimitiveIndicesOp &setMeshPrimitiveIndicesOp) { + auto primitiveIndex = setMeshPrimitiveIndicesOp.getPrimitiveIndex(); + if (!payload.fromLocalInvocationIndex(primitiveIndex)) + payload.toAllocas = false; + }) + .add([](Payload &payload, SetMeshPrimitiveCulledOp &setMeshPrimitiveCulledOp) { + auto primitiveIndex = setMeshPrimitiveCulledOp.getPrimitiveIndex(); + if (!payload.fromLocalInvocationIndex(primitiveIndex)) + payload.toAllocas = false; + }) + .add([](Payload &payload, WriteMeshOutputOp &writeMeshOutputOp) { + auto locationOffset = writeMeshOutputOp.getLocationOffset(); + auto primOrVertexIndex = writeMeshOutputOp.getPrimOrVertexIndex(); + if (locationOffset != payload.builder.getInt32(0)) + payload.toAllocas = false; // Output array indexing + else if (!payload.fromLocalInvocationIndex(primOrVertexIndex)) + payload.toAllocas = false; + }) + .build(); + visitor.visit(payload, *entryPoint); + + return toAllocas; +} + // ===================================================================================================================== // Process task shader lowering. // @@ -672,29 +777,36 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { // Barrier // Read vertex/primitive count from LDS // - // if (vertexCount == ~0) { + // if (vertexCount == -1) { // if (threadIdInSubgroup == 0) // Send message GS_ALLOC_REQ (vertexCount = 0, primitiveCount = 0) // return // } // + // if (vertexCount == 0) + // return + // // if (primitiveIndex < primitiveCount) { // Read primitive connectivity data from LDS // Read primitive built-ins from LDS // Export primitive - // - // Read primitive attributes from LDS - // Export primitive attributes // } // // if (vertexIndex < vertexCount) { // Read vertex built-ins from LDS // Export vertex position data + // } // + // if (vertexIndex < vertexCount) { // Read vertex attributes from LDS // Export vertex attributes // } // + // if (primitiveIndex < primitiveCount) { + // Read primitive attributes from LDS + // Export primitive attributes + // } + // // if (threadIdInSubgroup == 0) // Write data to mesh pipeline statistics buffer // @@ -702,6 +814,8 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { // } // + auto &hwConfig = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.hwConfig; + // NOTE: We have to reset these two members since they might have stale values left by task shader processing. m_shaderRingEntryIndex = nullptr; m_payloadRingEntryOffset = nullptr; @@ -711,9 +825,15 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { auto &meshMode = m_pipelineState->getShaderModes()->getMeshShaderMode(); const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Mesh); + const bool rowExport = usesRowExport(m_pipelineState); + + // Setup LDS layout. We might shrink allocated LDS size if mesh outputs could be stored to allocas without LDS. + const unsigned ldsSizeDwords = layoutMeshShaderLds(m_pipelineState, entryPoint, &m_ldsLayout, &m_outputsLayout); + + // Make sure we don't allocate more than what can legally be allocated by a single subgroup on the hardware. + assert(ldsSizeDwords <= m_pipelineState->getTargetInfo().getGpuProperty().gsOnChipMaxLdsSize); + hwConfig.gsOnChipLdsSize = ldsSizeDwords; - // Setup LDS layout - layoutMeshShaderLds(m_pipelineState, entryPoint, &m_ldsLayout, &m_outputsLayout); m_lds = getOrCreateMeshLds(entryPoint->getParent()); // Mutate mesh shader entry-point @@ -721,18 +841,21 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { // Force s_barrier to be present if necessary (ignore optimization) const unsigned numMeshThreads = meshMode.workgroupSizeX * meshMode.workgroupSizeY * meshMode.workgroupSizeZ; - auto primAmpFactor = - m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.hwConfig.primAmpFactor; // If we enable row export, the actual thread group size is determined by work group size provided from API mesh // shader. - const unsigned flatWorkgroupSize = - alignTo(m_pipelineState->enableMeshRowExport() ? numMeshThreads : primAmpFactor, waveSize); + const unsigned flatWorkgroupSize = alignTo(rowExport ? numMeshThreads : hwConfig.primAmpFactor, waveSize); entryPoint->addFnAttr("amdgpu-flat-work-group-size", std::to_string(flatWorkgroupSize) + std::string(",") + std::to_string(flatWorkgroupSize)); const unsigned numWaves = flatWorkgroupSize / waveSize; const unsigned numMeshWaves = alignTo(numMeshThreads, waveSize) / waveSize; + const bool waAtmPrecedesPos = + m_gfxIp.major >= 11 ? m_pipelineState->getTargetInfo().getGpuWorkarounds().gfx11.waAtmPrecedesPos : false; + + const unsigned numVertexAttributes = m_outputsLayout.vertexExportCount; + const unsigned numPrimitiveAttributes = m_outputsLayout.primitiveExportCount; + // API mesh shader entry block BasicBlock *apiMeshEntryBlock = &entryPoint->getEntryBlock(); apiMeshEntryBlock->setName(".apiMeshEntry"); @@ -771,20 +894,38 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { auto checkDummyAllocReqBlock = createBlock(".checkDummyAllocReq"); auto dummyAllocReqBlock = createBlock(".dummyAllocReq"); auto endDummyAllocReqBlock = createBlock(".endDummyAllocReq"); - auto checkExportPrimitiveBlock = createBlock(".checkExportPrimitive"); + + auto checkNoExportBlock = createBlock(".checkNoExport"); + auto skipExportBlock = createBlock(".skipExport"); auto exportPrimitiveHeaderBlock = createBlock(".exportPrimitiveHeader"); auto exportPrimitiveBodyBlock = createBlock(".exportPrimitiveBody"); auto endExportPrimitiveBlock = createBlock(".endExportPrimitive"); - auto exportVertexHeaderBlock = createBlock(".exportVertexHeader"); - auto exportVertexBodyBlock = createBlock(".exportVertexBody"); - auto endExportVertexBlock = createBlock(".endExportVertex"); + auto exportPositionHeaderBlock = createBlock(".exportPositionHeader"); + auto exportPositionBodyBlock = createBlock(".exportPositionBody"); + auto endExportPositionBlock = createBlock(".endExportPosition"); + + auto exportVertexAttributeHeaderBlock = createBlock(".exportVertexAttributeHeader"); + auto exportVertexAttributeBodyBlock = createBlock(".exportVertexAttributeBody"); + auto endExportVertexAttributeBlock = createBlock(".endExportVertexAttribute"); + + auto exportPrimitiveAttributeHeaderBlock = createBlock(".exportPrimitiveAttributeHeader"); + auto exportPrimitiveAttributeBodyBlock = createBlock(".exportPrimitiveAttributeBody"); + auto endExportPrimitiveAttributeBlock = createBlock(".endExportPrimitiveAttribute"); + + if (waAtmPrecedesPos) { + // Move position export blocks after attribute export blocks if ATM-precedes-pos workaround is required. + exportPositionHeaderBlock->moveAfter(endExportPrimitiveAttributeBlock); + exportPositionBodyBlock->moveAfter(exportPositionHeaderBlock); + endExportPositionBlock->moveAfter(exportPositionBodyBlock); + } auto collectMeshStatsBlock = createBlock(".collectMeshStats"); auto exitBlock = createBlock(".exit"); // Construct ".entry" block + Value *firstThreadInSubgroup = nullptr; { m_builder.SetInsertPoint(entryBlock); @@ -807,66 +948,91 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { if (m_gfxIp.major >= 11) prepareAttribRingAccess(); - m_builder.CreateBr(initPrimitiveIndicesHeaderBlock); + if (m_outputsLayout.outputsToAllocas) { + firstThreadInSubgroup = m_builder.CreateICmpEQ(m_waveThreadInfo.threadIdInSubgroup, m_builder.getInt32(0)); + m_builder.CreateCondBr(firstThreadInSubgroup, writeSpecialValueBlock, endWriteSpecialValueBlock); + } else { + m_builder.CreateBr(initPrimitiveIndicesHeaderBlock); + } } - // Construct ".initPrimitiveIndicesHeader" block PHINode *loopIndexPhi = nullptr; - { - m_builder.SetInsertPoint(initPrimitiveIndicesHeaderBlock); + if (m_outputsLayout.outputsToAllocas) { + // NOTE: If we can store mesh outputs to allocas, there is no need of initializing primitive indices in LDS. - if (m_pipelineState->enableMeshRowExport()) { - loopIndexPhi = m_builder.CreatePHI(m_builder.getInt32Ty(), 2); - loopIndexPhi->addIncoming(m_builder.getInt32(0), entryBlock); // loopIndex = 0 + // Mark ".initPrimitiveIndicesHeader" block as unused + { + m_builder.SetInsertPoint(initPrimitiveIndicesHeaderBlock); + m_builder.CreateUnreachable(); + } - // primitiveIndex = threadIdInSubgroup + loopIndex * waveSize - m_waveThreadInfo.primOrVertexIndex = - m_builder.CreateAdd(m_waveThreadInfo.threadIdInSubgroup, - m_builder.CreateMul(loopIndexPhi, m_builder.getInt32(waveSize)), "primitiveIndex"); + // Mark ".initPrimitiveIndicesBody" block as unused + { + m_builder.SetInsertPoint(initPrimitiveIndicesBodyBlock); + m_builder.CreateUnreachable(); } - auto validPrimitive = - m_builder.CreateICmpULT(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(meshMode.outputPrimitives)); - m_builder.CreateCondBr(validPrimitive, initPrimitiveIndicesBodyBlock, endInitPrimitiveIndicesBlock); - } + // Mark ".endInitPrimitiveIndices" block as unused + { + m_builder.SetInsertPoint(endInitPrimitiveIndicesBlock); + m_builder.CreateUnreachable(); + } + } else { + // Construct ".initPrimitiveIndicesHeader" block + { + m_builder.SetInsertPoint(initPrimitiveIndicesHeaderBlock); - // Construct ".initPrimitiveIndicesBody" block - { - m_builder.SetInsertPoint(initPrimitiveIndicesBodyBlock); + if (rowExport) { + loopIndexPhi = m_builder.CreatePHI(m_builder.getInt32Ty(), 2); + loopIndexPhi->addIncoming(m_builder.getInt32(0), entryBlock); // loopIndex = 0 - if (m_pipelineState->enableMeshRowExport()) { - // - // Row export is something like this: - // - // loopIndex = 0 - // primitiveIndex = threadIdInSubgroup - // - // while (primitiveIndex < outputPrimitives) { - // Zero primitive connectivity data - // - // loopIndex += numWaves - // primitiveIndex += loopIndex * waveSize - // } - // - auto loopIndex = m_builder.CreateAdd(loopIndexPhi, m_builder.getInt32(numWaves)); // loopIndex += numWaves - loopIndexPhi->addIncoming(loopIndex, initPrimitiveIndicesBodyBlock); + // primitiveIndex = threadIdInSubgroup + loopIndex * waveSize + m_waveThreadInfo.primOrVertexIndex = + m_builder.CreateAdd(m_waveThreadInfo.threadIdInSubgroup, + m_builder.CreateMul(loopIndexPhi, m_builder.getInt32(waveSize)), "primitiveIndex"); + } + + auto validPrimitive = + m_builder.CreateICmpULT(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(meshMode.outputPrimitives)); + m_builder.CreateCondBr(validPrimitive, initPrimitiveIndicesBodyBlock, endInitPrimitiveIndicesBlock); } - auto ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveIndices)); - auto ldsOffset = m_builder.CreateAdd(ldsStart, m_waveThreadInfo.primOrVertexIndex); + // Construct ".initPrimitiveIndicesBody" block + { + m_builder.SetInsertPoint(initPrimitiveIndicesBodyBlock); + + if (rowExport) { + // + // Row export is something like this: + // + // loopIndex = 0 + // primitiveIndex = threadIdInSubgroup + // + // while (primitiveIndex < outputPrimitives) { + // Zero primitive connectivity data + // + // loopIndex += numWaves + // primitiveIndex += loopIndex * waveSize + // } + // + auto loopIndex = m_builder.CreateAdd(loopIndexPhi, m_builder.getInt32(numWaves)); // loopIndex += numWaves + loopIndexPhi->addIncoming(loopIndex, initPrimitiveIndicesBodyBlock); + } - writeValueToLds(m_builder.getInt32(0), ldsOffset); - m_builder.CreateBr(m_pipelineState->enableMeshRowExport() ? initPrimitiveIndicesHeaderBlock - : endInitPrimitiveIndicesBlock); - } + auto ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveIndices)); + auto ldsOffset = m_builder.CreateAdd(ldsStart, m_waveThreadInfo.primOrVertexIndex); - // Construct ".endInitPrimitiveIndices" block - Value *firstThreadInSubgroup = nullptr; - { - m_builder.SetInsertPoint(endInitPrimitiveIndicesBlock); + writeValueToLds(m_builder.getInt32(0), ldsOffset); + m_builder.CreateBr(rowExport ? initPrimitiveIndicesHeaderBlock : endInitPrimitiveIndicesBlock); + } - firstThreadInSubgroup = m_builder.CreateICmpEQ(m_waveThreadInfo.threadIdInSubgroup, m_builder.getInt32(0)); - m_builder.CreateCondBr(firstThreadInSubgroup, writeSpecialValueBlock, endWriteSpecialValueBlock); + // Construct ".endInitPrimitiveIndices" block + { + m_builder.SetInsertPoint(endInitPrimitiveIndicesBlock); + + firstThreadInSubgroup = m_builder.CreateICmpEQ(m_waveThreadInfo.threadIdInSubgroup, m_builder.getInt32(0)); + m_builder.CreateCondBr(firstThreadInSubgroup, writeSpecialValueBlock, endWriteSpecialValueBlock); + } } // Construct ".writeSpecialValue" block @@ -876,7 +1042,7 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { // NOTE: We write invalid value (~0) to vertex count as the sentinel. If API mesh shader executes // SetMeshOutputs, the value will be changed to a valid one. Otherwise, we know SetMeshOutputs is not be // executed and we must make a dummy sendmsg (GS_ALLOC_REQ) with zero vertex/primitive count. - auto ldsOffset = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::VertexCount)); + auto ldsOffset = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::MeshOutputCounts)); writeValueToLds(m_builder.getInt32(InvalidValue), ldsOffset); // Write barrier completion flag to LDS if it is required. Otherwise, skip it. @@ -900,7 +1066,10 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { { m_builder.SetInsertPoint(endWriteSpecialValueBlock); - createFenceAndBarrier(); + // NOTE: This barrier is for initialization of primitive indices in LDS, writing barrier completion flag to LDS, or + // writing flat workgroup ID to LDS. If all cases are not encountered, this barrier is not needed. + if (!m_outputsLayout.outputsToAllocas || m_needBarrierFlag || useFlatWorkgroupId(m_pipelineState)) + createFenceAndBarrier(); auto validMeshWave = m_builder.CreateICmpULT(m_waveThreadInfo.waveIdInSubgroup, m_builder.getInt32(numMeshWaves)); // There could be no extra waves @@ -986,20 +1155,23 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { createFenceAndBarrier(); - Value *ldsOffset = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::VertexCount)); - vertexCount = readValueFromLds(m_builder.getInt32Ty(), ldsOffset); + Value *ldsOffset = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::MeshOutputCounts)); + auto meshOutputCounts = readValueFromLds(m_builder.getInt64Ty(), ldsOffset, sizeof(uint64_t)); + meshOutputCounts = + m_builder.CreateBitCast(meshOutputCounts, FixedVectorType::get(m_builder.getInt32Ty(), 2), "meshOutputCounts"); + + vertexCount = m_builder.CreateExtractElement(meshOutputCounts, static_cast(0)); vertexCount = m_builder.CreateIntrinsic(m_builder.getInt32Ty(), Intrinsic::amdgcn_readfirstlane, vertexCount); // Promoted to SGPR vertexCount->setName("vertexCount"); - ldsOffset = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveCount)); - primitiveCount = readValueFromLds(m_builder.getInt32Ty(), ldsOffset); + primitiveCount = m_builder.CreateExtractElement(meshOutputCounts, 1); primitiveCount = m_builder.CreateIntrinsic(m_builder.getInt32Ty(), Intrinsic::amdgcn_readfirstlane, primitiveCount); // Promoted to SGPR primitiveCount->setName("primitiveCount"); auto dummyAllocReq = m_builder.CreateICmpEQ(vertexCount, m_builder.getInt32(InvalidValue)); - m_builder.CreateCondBr(dummyAllocReq, checkDummyAllocReqBlock, checkExportPrimitiveBlock); + m_builder.CreateCondBr(dummyAllocReq, checkDummyAllocReqBlock, checkNoExportBlock); } // Construct ".checkDummyAllocReq" block @@ -1029,20 +1201,30 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { m_builder.CreateRetVoid(); } - // Construct ".checkExportPrimitive" block + // Construct ".checkNoExport" block { - m_builder.SetInsertPoint(checkExportPrimitiveBlock); + m_builder.SetInsertPoint(checkNoExportBlock); - m_builder.CreateBr(exportPrimitiveHeaderBlock); + // NOTE: When vertex count is 0, primitive count is 0 as well according to the processing of SetMeshOutputs. + // In such case, we can skip primitive/vertex export and do early return. + auto noExport = m_builder.CreateICmpEQ(vertexCount, m_builder.getInt32(0)); + m_builder.CreateCondBr(noExport, skipExportBlock, exportPrimitiveHeaderBlock); + } + + // Construct ".skipExport" block + { + m_builder.SetInsertPoint(skipExportBlock); + + m_builder.CreateRetVoid(); } // Construct ".exportPrimitiveHeader" block { m_builder.SetInsertPoint(exportPrimitiveHeaderBlock); - if (m_pipelineState->enableMeshRowExport()) { + if (rowExport) { loopIndexPhi = m_builder.CreatePHI(m_builder.getInt32Ty(), 2); - loopIndexPhi->addIncoming(m_builder.getInt32(0), checkExportPrimitiveBlock); // loopIndex = 0 + loopIndexPhi->addIncoming(m_builder.getInt32(0), checkNoExportBlock); // loopIndex = 0 // primitiveIndex = threadIdInSubgroup + loopIndex * waveSize m_waveThreadInfo.primOrVertexIndex = @@ -1064,7 +1246,7 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { { m_builder.SetInsertPoint(exportPrimitiveBodyBlock); - if (m_pipelineState->enableMeshRowExport()) { + if (rowExport) { // // Row export is something like this: // @@ -1074,8 +1256,6 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { // // while (primitiveIndex < primitiveCount) { // Export primitive - // Export primitive attributes - // // loopIndex += numWaves // primitiveIndex += loopIndex * waveSize // rowInSubgroup += loopIndex @@ -1086,23 +1266,24 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { } exportPrimitive(); - m_builder.CreateBr(m_pipelineState->enableMeshRowExport() ? exportPrimitiveHeaderBlock : endExportPrimitiveBlock); + m_builder.CreateBr(rowExport ? exportPrimitiveHeaderBlock : endExportPrimitiveBlock); } // Construct ".endExportPrimitive" block { m_builder.SetInsertPoint(endExportPrimitiveBlock); - m_builder.CreateBr(exportVertexHeaderBlock); + m_builder.CreateBr(waAtmPrecedesPos ? exportVertexAttributeHeaderBlock : exportPositionHeaderBlock); } - // Construct ".exportVertexHeader" block + // Construct ".exportPositionHeader" block { - m_builder.SetInsertPoint(exportVertexHeaderBlock); + m_builder.SetInsertPoint(exportPositionHeaderBlock); - if (m_pipelineState->enableMeshRowExport()) { + if (rowExport) { loopIndexPhi = m_builder.CreatePHI(m_builder.getInt32Ty(), 2); - loopIndexPhi->addIncoming(m_builder.getInt32(0), endExportPrimitiveBlock); // loopIndex = 0 + loopIndexPhi->addIncoming(m_builder.getInt32(0), waAtmPrecedesPos ? endExportPrimitiveAttributeBlock + : endExportPrimitiveBlock); // loopIndex = 0 // vertexIndex = threadIdInSubgroup + loopIndex * waveSize m_waveThreadInfo.primOrVertexIndex = @@ -1117,14 +1298,14 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { } auto validVertex = m_builder.CreateICmpULT(m_waveThreadInfo.primOrVertexIndex, vertexCount); - m_builder.CreateCondBr(validVertex, exportVertexBodyBlock, endExportVertexBlock); + m_builder.CreateCondBr(validVertex, exportPositionBodyBlock, endExportPositionBlock); } - // Construct "exportVertexBody" block + // Construct "exportPositionBody" block { - m_builder.SetInsertPoint(exportVertexBodyBlock); + m_builder.SetInsertPoint(exportPositionBodyBlock); - if (m_pipelineState->enableMeshRowExport()) { + if (rowExport) { // // Row export is something like this: // @@ -1133,28 +1314,177 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { // rowInSubgroup = waveIdInSubgroup // // while (vertexIndex < vertexCount) { - // Export vertex position data - // Export vertex attributes - // + // Export positions // loopIndex += numWaves // vertexIndex += loopIndex * waveSize // rowInSubgroup += loopIndex // } // auto loopIndex = m_builder.CreateAdd(loopIndexPhi, m_builder.getInt32(numWaves)); // loopIndex += numWaves - loopIndexPhi->addIncoming(loopIndex, exportVertexBodyBlock); + loopIndexPhi->addIncoming(loopIndex, exportPositionBodyBlock); } - exportVertex(); - m_builder.CreateBr(m_pipelineState->enableMeshRowExport() ? exportVertexHeaderBlock : endExportVertexBlock); + exportPositions(); + m_builder.CreateBr(rowExport ? exportPositionHeaderBlock : endExportPositionBlock); } - // Construct ".endExportVertex" block + // Construct ".endExportPosition" block { - m_builder.SetInsertPoint(endExportVertexBlock); + m_builder.SetInsertPoint(endExportPositionBlock); - auto firstThreadInSubgroup = m_builder.CreateICmpEQ(m_waveThreadInfo.threadIdInSubgroup, m_builder.getInt32(0)); - m_builder.CreateCondBr(firstThreadInSubgroup, collectMeshStatsBlock, exitBlock); + if (waAtmPrecedesPos) { + auto firstThreadInSubgroup = m_builder.CreateICmpEQ(m_waveThreadInfo.threadIdInSubgroup, m_builder.getInt32(0)); + m_builder.CreateCondBr(firstThreadInSubgroup, collectMeshStatsBlock, exitBlock); + } else { + m_builder.CreateBr(exportVertexAttributeHeaderBlock); + } + } + + // Construct ".exportVertexAttributeHeader" block + { + m_builder.SetInsertPoint(exportVertexAttributeHeaderBlock); + + if (numVertexAttributes > 0) { + if (rowExport) { + loopIndexPhi = m_builder.CreatePHI(m_builder.getInt32Ty(), 2); + loopIndexPhi->addIncoming(m_builder.getInt32(0), + waAtmPrecedesPos ? endExportPrimitiveBlock : endExportPositionBlock); // loopIndex = 0 + + // vertexIndex = threadIdInSubgroup + loopIndex * waveSize + m_waveThreadInfo.primOrVertexIndex = + m_builder.CreateAdd(m_waveThreadInfo.threadIdInSubgroup, + m_builder.CreateMul(loopIndexPhi, m_builder.getInt32(waveSize)), "vertexIndex"); + + if (m_gfxIp.major >= 11) { + // rowInSubgroup = waveIdInSubgroup + loopIndex + m_waveThreadInfo.rowInSubgroup = + m_builder.CreateAdd(m_waveThreadInfo.waveIdInSubgroup, loopIndexPhi, "rowInSubgroup"); + } + } + + auto validVertex = m_builder.CreateICmpULT(m_waveThreadInfo.primOrVertexIndex, vertexCount); + m_builder.CreateCondBr(validVertex, exportVertexAttributeBodyBlock, endExportVertexAttributeBlock); + } else { + // No vertex attributes to export + m_builder.CreateBr(endExportVertexAttributeBlock); + } + } + + // Construct "exportVertexAttributeBody" block + { + m_builder.SetInsertPoint(exportVertexAttributeBodyBlock); + + if (numVertexAttributes > 0) { + if (rowExport) { + // + // Row export is something like this: + // + // loopIndex = 0 + // vertexIndex = threadIdInSubgroup + // rowInSubgroup = waveIdInSubgroup + // + // while (vertexIndex < vertexCount) { + // Export vertex attributes + // loopIndex += numWaves + // vertexIndex += loopIndex * waveSize + // rowInSubgroup += loopIndex + // } + // + auto loopIndex = m_builder.CreateAdd(loopIndexPhi, m_builder.getInt32(numWaves)); // loopIndex += numWaves + loopIndexPhi->addIncoming(loopIndex, exportVertexAttributeBodyBlock); + } + + exportVertexAttributes(); + m_builder.CreateBr(rowExport ? exportVertexAttributeHeaderBlock : endExportVertexAttributeBlock); + } else { + // No vertex attributes to export + m_builder.CreateUnreachable(); + } + } + + // Construct ".endExportVertexAttribute" block + { + m_builder.SetInsertPoint(endExportVertexAttributeBlock); + + m_builder.CreateBr(exportPrimitiveAttributeHeaderBlock); + } + + // Construct ".exportPrimitiveAttributeHeader" block + { + m_builder.SetInsertPoint(exportPrimitiveAttributeHeaderBlock); + + if (numPrimitiveAttributes > 0) { + if (rowExport) { + loopIndexPhi = m_builder.CreatePHI(m_builder.getInt32Ty(), 2); + loopIndexPhi->addIncoming(m_builder.getInt32(0), endExportVertexAttributeBlock); // loopIndex = 0 + + // primitiveIndex = threadIdInSubgroup + loopIndex * waveSize + m_waveThreadInfo.primOrVertexIndex = + m_builder.CreateAdd(m_waveThreadInfo.threadIdInSubgroup, + m_builder.CreateMul(loopIndexPhi, m_builder.getInt32(waveSize)), "primitiveIndex"); + + if (m_gfxIp.major >= 11) { + // rowInSubgroup = waveIdInSubgroup + loopIndex + m_waveThreadInfo.rowInSubgroup = + m_builder.CreateAdd(m_waveThreadInfo.waveIdInSubgroup, loopIndexPhi, "rowInSubgroup"); + } + } + + auto validPrimitive = m_builder.CreateICmpULT(m_waveThreadInfo.primOrVertexIndex, primitiveCount); + m_builder.CreateCondBr(validPrimitive, exportPrimitiveAttributeBodyBlock, endExportPrimitiveAttributeBlock); + } else { + // No primitive attributes to export + m_builder.CreateBr(endExportPrimitiveAttributeBlock); + } + } + + // Construct "exportPrimitiveAttributeBody" block + { + m_builder.SetInsertPoint(exportPrimitiveAttributeBodyBlock); + + if (numPrimitiveAttributes > 0) { + if (rowExport) { + // + // Row export is something like this: + // + // loopIndex = 0 + // primitiveIndex = threadIdInSubgroup + // rowInSubgroup = waveIdInSubgroup + // + // while (primitiveIndex < primitiveCount) { + // Export primitive attributes + // loopIndex += numWaves + // primitiveIndex += loopIndex * waveSize + // rowInSubgroup += loopIndex + // } + // + auto loopIndex = m_builder.CreateAdd(loopIndexPhi, m_builder.getInt32(numWaves)); // loopIndex += numWaves + loopIndexPhi->addIncoming(loopIndex, exportPrimitiveAttributeBodyBlock); + } + + exportPrimitiveAttributes(); + m_builder.CreateBr(rowExport ? exportPrimitiveAttributeHeaderBlock : endExportPrimitiveAttributeBlock); + } else { + // No primitive attributes to export + m_builder.CreateUnreachable(); + } + } + + // Construct ".endExportPrimitiveAttribute" block + { + m_builder.SetInsertPoint(endExportPrimitiveAttributeBlock); + + if (waAtmPrecedesPos) { + if (numVertexAttributes > 0 || numPrimitiveAttributes > 0) { + // Before the first position export, add s_wait_vscnt 0 to make sure the completion of all + // attributes being written to the attribute ring buffer + m_builder.CreateFence(AtomicOrdering::Release, m_builder.getContext().getOrInsertSyncScopeID("agent")); + } + m_builder.CreateBr(exportPositionHeaderBlock); + } else { + auto firstThreadInSubgroup = m_builder.CreateICmpEQ(m_waveThreadInfo.threadIdInSubgroup, m_builder.getInt32(0)); + m_builder.CreateCondBr(firstThreadInSubgroup, collectMeshStatsBlock, exitBlock); + } } // Construct ".collectMeshStats" block @@ -1419,18 +1749,22 @@ void MeshTaskShader::lowerSetMeshOutputs(SetMeshOutputsOp &setMeshOutputsOp) { { m_builder.SetInsertPoint(setMeshOutputsBlock->getTerminator()); - // Promote vertex/primitive count to SGPRs - vertexCount = m_builder.CreateIntrinsic(m_builder.getInt32Ty(), Intrinsic::amdgcn_readfirstlane, vertexCount); - primitiveCount = m_builder.CreateIntrinsic(m_builder.getInt32Ty(), Intrinsic::amdgcn_readfirstlane, primitiveCount); - // Check if vertex count or primitive count is zero. If so, set both to zero in order to disable vertex/primitive // exporting. - auto zeroVertexCount = m_builder.CreateICmpEQ(vertexCount, m_builder.getInt32(0)); - auto zeroPrimitiveCount = m_builder.CreateICmpEQ(primitiveCount, m_builder.getInt32(0)); - auto hasZeroCount = m_builder.CreateOr(zeroVertexCount, zeroPrimitiveCount); + auto productOfCounts = m_builder.CreateMul(vertexCount, primitiveCount); + productOfCounts = + m_builder.CreateIntrinsic(m_builder.getInt32Ty(), Intrinsic::amdgcn_readfirstlane, productOfCounts); + auto hasZeroCount = m_builder.CreateICmpEQ(productOfCounts, m_builder.getInt32(0)); vertexCount = m_builder.CreateSelect(hasZeroCount, m_builder.getInt32(0), vertexCount); primitiveCount = m_builder.CreateSelect(hasZeroCount, m_builder.getInt32(0), primitiveCount); + Value *ldsOffset = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::MeshOutputCounts)); + Value *meshOutputCounts = PoisonValue::get(FixedVectorType::get(m_builder.getInt32Ty(), 2)); + meshOutputCounts = m_builder.CreateInsertElement(meshOutputCounts, vertexCount, static_cast(0)); + meshOutputCounts = m_builder.CreateInsertElement(meshOutputCounts, primitiveCount, 1); + meshOutputCounts = m_builder.CreateBitCast(meshOutputCounts, m_builder.getInt64Ty(), "meshOutputCounts"); + writeValueToLds(meshOutputCounts, ldsOffset, sizeof(uint64_t)); + // NOTE: Here, we promote vertex/primitive count to SGPRs once again because M0 implicitly used in s_sendmsg is // SGPR. LLVM backend has issues of handling this because it doesn't use s_cselect to translate LLVM IR select // instruction (which keeps the destination operand still in SGPR) and it doesn't use readfirstlane to promote @@ -1442,12 +1776,6 @@ void MeshTaskShader::lowerSetMeshOutputs(SetMeshOutputsOp &setMeshOutputsOp) { Value *m0 = m_builder.CreateShl(primitiveCount, 12); m0 = m_builder.CreateOr(m0, vertexCount); m_builder.CreateIntrinsic(Intrinsic::amdgcn_s_sendmsg, {}, {m_builder.getInt32(GsAllocReq), m0}); - - Value *ldsOffset = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::VertexCount)); - writeValueToLds(vertexCount, ldsOffset); - - ldsOffset = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveCount)); - writeValueToLds(primitiveCount, ldsOffset); } // Construct ".endSetMeshOutputs" block @@ -1513,14 +1841,31 @@ void MeshTaskShader::lowerSetMeshPrimitiveIndices(SetMeshPrimitiveIndicesOp &set } } - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveIndices)); - Value *ldsOffset = m_builder.CreateAdd(ldsStart, primitiveIndex); - // NOTE: We first clear old primitive connectivity data and use atomic OR operation to set new data. This is because // the null primitive flag might be set via built-in CullPrimitive. static const unsigned ClearMask = (1u << 31); - atomicOpWithLds(AtomicRMWInst::And, m_builder.getInt32(ClearMask), ldsOffset); - atomicOpWithLds(AtomicRMWInst::Or, primitiveData, ldsOffset); + + if (m_outputsLayout.outputsToAllocas) { + if (!m_outputsLayout.primitiveDataAlloca) { + // Create alloca if not existing + IRBuilder<>::InsertPointGuard guard(m_builder); + m_builder.SetInsertPointPastAllocas(setMeshPrimitiveIndicesOp.getFunction()); + m_outputsLayout.primitiveDataAlloca = m_builder.CreateAlloca(m_builder.getInt32Ty(), nullptr, "primitiveData"); + m_outputsLayout.primitiveDataAlloca->setAlignment(Align(4)); + } + auto primitiveDataAlloca = m_outputsLayout.primitiveDataAlloca; + + Value *newPrimitiveData = m_builder.CreateLoad(m_builder.getInt32Ty(), primitiveDataAlloca); + newPrimitiveData = m_builder.CreateAnd(newPrimitiveData, ClearMask); + newPrimitiveData = m_builder.CreateOr(newPrimitiveData, primitiveData); + m_builder.CreateAlignedStore(newPrimitiveData, primitiveDataAlloca, Align(4)); + } else { + Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveIndices)); + Value *ldsOffset = m_builder.CreateAdd(ldsStart, primitiveIndex); + + atomicOpWithLds(AtomicRMWInst::And, m_builder.getInt32(ClearMask), ldsOffset); + atomicOpWithLds(AtomicRMWInst::Or, primitiveData, ldsOffset); + } m_callsToRemove.push_back(&setMeshPrimitiveIndicesOp); } @@ -1619,7 +1964,7 @@ void MeshTaskShader::lowerGetMeshBuiltinInput(GetMeshBuiltinInputOp &getMeshBuil // ===================================================================================================================== // Lower set mesh primitive culled state. Set primitive culled state by writing the null primitive flag to LDS. // -// @param setMeshPrimitiveIndicesOp : Call instruction op to set primitive indices for mesh shader +// @param setMeshPrimitiveCulledOp : Call instruction op to set primitive culled state void MeshTaskShader::lowerSetMeshPrimitiveCulled(SetMeshPrimitiveCulledOp &setMeshPrimitiveCulledOp) { m_builder.SetInsertPoint(&setMeshPrimitiveCulledOp); @@ -1640,14 +1985,31 @@ void MeshTaskShader::lowerSetMeshPrimitiveCulled(SetMeshPrimitiveCulledOp &setMe static const unsigned NullPrimitive = (1u << 31); auto nullPrimitive = m_builder.CreateSelect(isCulled, m_builder.getInt32(NullPrimitive), m_builder.getInt32(0)); - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveIndices)); - Value *ldsOffset = m_builder.CreateAdd(ldsStart, primitiveIndex); - // NOTE: We first clear null primitive flag and use atomic OR operation to set new flag. This is because the // primitive connectivity data might be set via built-in PrimitiveXXXIndices. static const unsigned ClearMask = ~(1u << 31); - atomicOpWithLds(AtomicRMWInst::And, m_builder.getInt32(ClearMask), ldsOffset); - atomicOpWithLds(AtomicRMWInst::Or, nullPrimitive, ldsOffset); + + if (m_outputsLayout.outputsToAllocas) { + if (!m_outputsLayout.primitiveDataAlloca) { + // Create alloca if not existing + IRBuilder<>::InsertPointGuard guard(m_builder); + m_builder.SetInsertPointPastAllocas(setMeshPrimitiveCulledOp.getFunction()); + m_outputsLayout.primitiveDataAlloca = m_builder.CreateAlloca(m_builder.getInt32Ty(), nullptr, "primitiveData"); + m_outputsLayout.primitiveDataAlloca->setAlignment(Align(4)); + } + auto primitiveDataAlloca = m_outputsLayout.primitiveDataAlloca; + + Value *newPrimitiveData = m_builder.CreateLoad(m_builder.getInt32Ty(), primitiveDataAlloca); + newPrimitiveData = m_builder.CreateAnd(newPrimitiveData, ClearMask); + newPrimitiveData = m_builder.CreateOr(newPrimitiveData, nullPrimitive); + m_builder.CreateAlignedStore(newPrimitiveData, primitiveDataAlloca, Align(4)); + } else { + Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveIndices)); + Value *ldsOffset = m_builder.CreateAdd(ldsStart, primitiveIndex); + + atomicOpWithLds(AtomicRMWInst::And, m_builder.getInt32(ClearMask), ldsOffset); + atomicOpWithLds(AtomicRMWInst::Or, nullPrimitive, ldsOffset); + } m_callsToRemove.push_back(&setMeshPrimitiveCulledOp); } @@ -1672,39 +2034,98 @@ void MeshTaskShader::lowerWriteMeshOutput(WriteMeshOutputOp &writeMeshOutputOp) isPrimitive ? m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.primitiveOutputComponents : m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.vertexOutputComponents; + assert(outputComponents.count(location) > 0); // Must exist + const unsigned numComponents = outputComponents[location].first; - // ldsOffset = ldsStart + primOrVertexIndex * primOrVertexStride + - // offsetInPrimOrVertex + locationIndex * numComponents + componentIndex - Value *ldsStart = m_builder.getInt32( - getMeshShaderLdsRegionStart(isPrimitive ? MeshLdsRegion::PrimitiveOutput : MeshLdsRegion::VertexOutput)); - const unsigned primOrVertexStride = isPrimitive ? m_outputsLayout.primitiveStride : m_outputsLayout.vertexStride; - Value *primOrVertexOffset = m_builder.CreateMul(primOrVertexIndex, m_builder.getInt32(primOrVertexStride)); + if (m_outputsLayout.outputsToAllocas) { + assert(locationOffset == m_builder.getInt32(0)); // Must not be output array indexing - Value *offsetInPrimOrVertex = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, isPrimitive)); - if (locationOffset != m_builder.getInt32(0)) { - auto locationIndex = locationOffset; + auto outputAllocaTy = FixedVectorType::get(m_builder.getFloatTy(), numComponents); - assert(outputComponents.count(location) > 0); // Must exist - unsigned numComponents = outputComponents[location].first; + auto &outputAllocas = isPrimitive ? m_outputsLayout.primitiveOutputAllocas : m_outputsLayout.vertexOutputAllocas; + if (outputAllocas.count(location) == 0) { + // Create alloca if not existing + IRBuilder<>::InsertPointGuard guard(m_builder); + m_builder.SetInsertPointPastAllocas(writeMeshOutputOp.getFunction()); - if (numComponents > 4) { - // NOTE: Here we encounter 64-bit vec3/vec4 data types. Such types will occupy two consecutive locations and the - // provided location offset must be divided by 2 to get real location index. - locationIndex = m_builder.CreateLShr(locationOffset, 2); + outputAllocas[location] = m_builder.CreateAlloca( + outputAllocaTy, nullptr, (isPrimitive ? "primitiveOutput" : "vertexOutput") + std::to_string(location)); + outputAllocas[location]->setAlignment(Align(4)); } - offsetInPrimOrVertex = m_builder.CreateAdd(offsetInPrimOrVertex, - m_builder.CreateMul(locationIndex, m_builder.getInt32(numComponents))); - } + auto outputAlloca = getOutputAlloca(location, isPrimitive); - if (componentIndex != m_builder.getInt32(0)) - offsetInPrimOrVertex = m_builder.CreateAdd(offsetInPrimOrVertex, componentIndex); + const unsigned bitWidth = outputValue->getType()->getScalarSizeInBits(); + unsigned numElements = outputValue->getType()->getPrimitiveSizeInBits() / bitWidth; - auto ldsOffset = ldsStart; - ldsOffset = m_builder.CreateAdd(ldsOffset, primOrVertexOffset); - ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimOrVertex); + // Bitcase the output to 32-bit value + if (bitWidth == 32 || bitWidth == 64) { + numElements *= (bitWidth / 32); + outputValue = m_builder.CreateBitCast(outputValue, FixedVectorType::get(m_builder.getFloatTy(), numElements)); + } else if (bitWidth == 8 || bitWidth == 16) { + if (outputValue->getType()->isFPOrFPVectorTy()) { + outputValue = + m_builder.CreateBitCast(outputValue, FixedVectorType::get(m_builder.getIntNTy(bitWidth), numElements)); + } + outputValue = m_builder.CreateZExt(outputValue, FixedVectorType::get(m_builder.getInt32Ty(), numElements)); + outputValue = m_builder.CreateBitCast(outputValue, FixedVectorType::get(m_builder.getFloatTy(), numElements)); + } + assert(outputValue->getType()->getScalarSizeInBits() == 32); // Must be 32-bit now - writeValueToLds(outputValue, ldsOffset); + if (outputAllocaTy == outputValue->getType()) { + // Store the whole output + assert(componentIndex == m_builder.getInt32(0)); + m_builder.CreateAlignedStore(outputValue, outputAlloca, Align(4)); + } else { + // Store part of the output + Value *newOutputValue = m_builder.CreateAlignedLoad(outputAllocaTy, outputAlloca, Align(4)); + + // Scalarize output value + SmallVector outputValues; + for (unsigned i = 0; i < numElements; ++i) + outputValues.push_back(m_builder.CreateExtractElement(outputValue, i)); + + // Insert output elements + for (unsigned i = 0; i < outputValues.size(); ++i) { + Value *insertIndex = componentIndex == m_builder.getInt32(0) + ? m_builder.getInt32(i) + : m_builder.CreateAdd(componentIndex, m_builder.getInt32(i)); + newOutputValue = m_builder.CreateInsertElement(newOutputValue, outputValues[i], insertIndex); + } + + m_builder.CreateAlignedStore(newOutputValue, outputAlloca, Align(4)); + } + } else { + // ldsOffset = ldsStart + primOrVertexIndex * primOrVertexStride + + // offsetInPrimOrVertex + locationIndex * numComponents + componentIndex + Value *ldsStart = m_builder.getInt32( + getMeshShaderLdsRegionStart(isPrimitive ? MeshLdsRegion::PrimitiveOutput : MeshLdsRegion::VertexOutput)); + const unsigned primOrVertexStride = isPrimitive ? m_outputsLayout.primitiveStride : m_outputsLayout.vertexStride; + Value *primOrVertexOffset = m_builder.CreateMul(primOrVertexIndex, m_builder.getInt32(primOrVertexStride)); + + Value *offsetInPrimOrVertex = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, isPrimitive)); + if (locationOffset != m_builder.getInt32(0)) { + auto locationIndex = locationOffset; + + if (numComponents > 4) { + // NOTE: Here we encounter 64-bit vec3/vec4 data types. Such types will occupy two consecutive locations and the + // provided location offset must be divided by 2 to get real location index. + locationIndex = m_builder.CreateLShr(locationOffset, 2); + } + + offsetInPrimOrVertex = m_builder.CreateAdd(offsetInPrimOrVertex, + m_builder.CreateMul(locationIndex, m_builder.getInt32(numComponents))); + } + + if (componentIndex != m_builder.getInt32(0)) + offsetInPrimOrVertex = m_builder.CreateAdd(offsetInPrimOrVertex, componentIndex); + + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, primOrVertexOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimOrVertex); + + writeValueToLds(outputValue, ldsOffset); + } m_callsToRemove.push_back(&writeMeshOutputOp); } @@ -2032,7 +2453,7 @@ void MeshTaskShader::lowerMeshShaderBody(BasicBlock *apiMeshEntryBlock, BasicBlo } // ===================================================================================================================== -// Export primitive (primitive connectivity data, primitive payload, and primitive attributes). +// Export primitive (primitive connectivity data and primitive payload). void MeshTaskShader::exportPrimitive() { const auto &builtInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->builtInUsage.mesh; @@ -2040,7 +2461,20 @@ void MeshTaskShader::exportPrimitive() { Value *ldsOffset = m_builder.CreateAdd(ldsStart, m_waveThreadInfo.primOrVertexIndex); // The first dword is primitive connectivity data - auto primitiveIndices = readValueFromLds(m_builder.getInt32Ty(), ldsOffset); + Value *primitiveIndices = nullptr; + if (m_outputsLayout.outputsToAllocas) { + if (m_outputsLayout.primitiveDataAlloca) { + primitiveIndices = + m_builder.CreateAlignedLoad(m_builder.getInt32Ty(), m_outputsLayout.primitiveDataAlloca, Align(4)); + } else { + // No primitive indices have been written + primitiveIndices = PoisonValue::get(m_builder.getInt32Ty()); + } + } else { + primitiveIndices = readValueFromLds(m_builder.getInt32Ty(), ldsOffset); + } + assert(primitiveIndices); + primitiveIndices->setName("primitiveIndices"); // The second dword is primitive payload, which has the following bit layout specified by HW: // @@ -2056,9 +2490,8 @@ void MeshTaskShader::exportPrimitive() { // | [31:28] | [27:24] | [23:20] | [19:13] | [12:0] | // +---------------+---------+----------------+---------+----------------+ Value *primitivePayload = nullptr; - Value *primitiveId = nullptr; if (builtInUsage.primitiveId) { - primitiveId = readMeshBuiltInFromLds(BuiltInPrimitiveId); + Value *primitiveId = readBackMeshBuiltInOutput(BuiltInPrimitiveId); if (m_gfxIp.major < 11) { // [16:0] = Pipeline primitive ID auto primitiveIdMaskAndShift = m_builder.CreateAnd(primitiveId, 0x1FFFF); @@ -2071,14 +2504,11 @@ void MeshTaskShader::exportPrimitive() { Value *layer = nullptr; if (builtInUsage.layer) - layer = readMeshBuiltInFromLds(BuiltInLayer); + layer = readBackMeshBuiltInOutput(BuiltInLayer); Value *viewportIndex = nullptr; if (builtInUsage.viewportIndex) - viewportIndex = readMeshBuiltInFromLds(BuiltInViewportIndex); - - Value *fsLayer = layer; - Value *fsViewportIndex = viewportIndex; + viewportIndex = readBackMeshBuiltInOutput(BuiltInViewportIndex); const bool enableMultiView = m_pipelineState->getInputAssemblyState().multiView != MultiViewMode::Disable; if (enableMultiView) { @@ -2132,7 +2562,7 @@ void MeshTaskShader::exportPrimitive() { if (builtInUsage.primitiveShadingRate) { // [31:28] = VRS rate - auto primitiveShadingRate = readMeshBuiltInFromLds(BuiltInPrimitiveShadingRate); + auto primitiveShadingRate = readBackMeshBuiltInOutput(BuiltInPrimitiveShadingRate); auto hwShadingRateMaskAndShift = convertToHwShadingRate(primitiveShadingRate); hwShadingRateMaskAndShift = m_builder.CreateAnd(hwShadingRateMaskAndShift, 0xF); @@ -2144,16 +2574,87 @@ void MeshTaskShader::exportPrimitive() { primitivePayload = hwShadingRateMaskAndShift; } - doExport(ExportKind::Prim, ExportInfo{0, {primitiveIndices, primitivePayload}}); + if (primitivePayload) + primitivePayload->setName("primitivePayload"); - // Primitive attribute export follows vertex attribute export - SmallVector primAttrExports; + doExport(ExportKind::Primitive, ExportInfo{0, {primitiveIndices, primitivePayload}}); +} - // Export primitive attributes (from generic outputs) - ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveOutput)); - auto primitiveOffset = - m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.primitiveStride)); +// ===================================================================================================================== +// Export vertex positions. +void MeshTaskShader::exportPositions() { + const auto &builtInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->builtInUsage.mesh; + + SmallVector positionExports; + + if (builtInUsage.position) { + auto position = readBackMeshBuiltInOutput(BuiltInPosition); + std::array positions = { + m_builder.CreateExtractElement(position, static_cast(0)), m_builder.CreateExtractElement(position, 1), + m_builder.CreateExtractElement(position, 2), m_builder.CreateExtractElement(position, 3)}; + positionExports.push_back({0, positions}); + } + + if (builtInUsage.pointSize) { + auto pointSize = readBackMeshBuiltInOutput(BuiltInPointSize); + positionExports.push_back({1, pointSize}); + } + + SmallVector clipDistances; + if (builtInUsage.clipDistance > 0) { + auto clipDistance = readBackMeshBuiltInOutput(BuiltInClipDistance); + for (unsigned i = 0; i < builtInUsage.clipDistance; ++i) + clipDistances.push_back(m_builder.CreateExtractElement(clipDistance, i)); + } + SmallVector cullDistances; + if (builtInUsage.cullDistance > 0) { + auto cullDistance = readBackMeshBuiltInOutput(BuiltInCullDistance); + for (unsigned i = 0; i < builtInUsage.cullDistance; ++i) + cullDistances.push_back(m_builder.CreateExtractElement(cullDistance, i)); + } + + SmallVector clipCullDistances; + if (builtInUsage.clipDistance > 0 || builtInUsage.cullDistance > 0) { + assert(builtInUsage.clipDistance + builtInUsage.cullDistance <= MaxClipCullDistanceCount); + + // Merge clipDistance and cullDistance + for (auto clipDistance : clipDistances) + clipCullDistances.push_back(clipDistance); + + for (auto cullDistance : cullDistances) + clipCullDistances.push_back(cullDistance); + + // Do array padding + auto poison = PoisonValue::get(m_builder.getFloatTy()); + if (clipCullDistances.size() <= 4) { + while (clipCullDistances.size() < 4) // <4 x float> + clipCullDistances.push_back(poison); + } else { + while (clipCullDistances.size() < 8) // <8 x float> + clipCullDistances.push_back(poison); + } + + unsigned exportSlot = builtInUsage.pointSize ? 2 : 1; + positionExports.push_back( + {exportSlot, {clipCullDistances[0], clipCullDistances[1], clipCullDistances[2], clipCullDistances[3]}}); + + if (clipCullDistances.size() > 4) { + // Do the second exporting + positionExports.push_back( + {exportSlot + 1, {clipCullDistances[4], clipCullDistances[5], clipCullDistances[6], clipCullDistances[7]}}); + } + } + + doExport(ExportKind::Position, positionExports); +} + +// ===================================================================================================================== +// Export primitive attributes +void MeshTaskShader::exportPrimitiveAttributes() { + SmallVector attributeExports; + + // Export primitive attributes (from generic outputs) auto &primitiveOutputComponents = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.primitiveOutputComponents; for (auto &primitiveOutput : primitiveOutputComponents) { @@ -2164,13 +2665,7 @@ void MeshTaskShader::exportPrimitive() { if (forBuiltIn != InvalidValue) continue; // Skip those special outputs mapped from primitive built-ins. They will be handled later on. - auto offsetInPrimitive = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, true)); - - auto ldsOffset = ldsStart; - ldsOffset = m_builder.CreateAdd(ldsOffset, primitiveOffset); - ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimitive); - - auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), numComponents), ldsOffset); + auto exportValue = readBackMeshGenericOutput(location, true); SmallVector exporteValues; for (unsigned i = 0; i < numComponents; ++i) @@ -2187,23 +2682,27 @@ void MeshTaskShader::exportPrimitive() { unsigned exportSlot = getOutputExportSlot(location, true); assert(exportSlot != InvalidValue); - primAttrExports.push_back({exportSlot, exporteValues[0], exporteValues[1], exporteValues[2], exporteValues[3]}); + attributeExports.push_back({exportSlot, exporteValues[0], exporteValues[1], exporteValues[2], exporteValues[3]}); if (numComponents > 4) - primAttrExports.push_back( + attributeExports.push_back( {exportSlot + 1, exporteValues[4], exporteValues[5], exporteValues[6], exporteValues[7]}); } // Export primitive attributes (from built-ins as generic ones) + const auto &builtInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->builtInUsage.mesh; + if (builtInUsage.primitiveId) { const unsigned exportSlot = getOutputExportSlot(BuiltInPrimitiveId, true); if (exportSlot != InvalidValue) { - assert(primitiveId); - primAttrExports.push_back({exportSlot, primitiveId}); + Value *primitiveId = readBackMeshBuiltInOutput(BuiltInPrimitiveId); + attributeExports.push_back({exportSlot, primitiveId}); } } + Value *layer = nullptr; bool exportLayer = false; if (builtInUsage.layer) { + layer = readBackMeshBuiltInOutput(BuiltInLayer); exportLayer = true; } else { const auto nextStage = m_pipelineState->getNextShaderStage(ShaderStage::Mesh); @@ -2212,7 +2711,7 @@ void MeshTaskShader::exportPrimitive() { if (fsBuiltInUsage.layer) { // NOTE: In such case, mesh shader doesn't export layer while fragment shader expects to read it. We // export 0 to fragment shader, which is required by the spec. - fsLayer = m_builder.getInt32(0); + layer = m_builder.getInt32(0); exportLayer = true; } } @@ -2221,13 +2720,15 @@ void MeshTaskShader::exportPrimitive() { if (exportLayer) { const unsigned exportSlot = getOutputExportSlot(BuiltInLayer, true); if (exportSlot != InvalidValue) { - assert(fsLayer); - primAttrExports.push_back({exportSlot, fsLayer}); + assert(layer); + attributeExports.push_back({exportSlot, layer}); } } + Value *viewportIndex = nullptr; bool exportViewportIndex = false; if (builtInUsage.viewportIndex) { + viewportIndex = readBackMeshBuiltInOutput(BuiltInViewportIndex); exportViewportIndex = true; } else { const auto nextStage = m_pipelineState->getNextShaderStage(ShaderStage::Mesh); @@ -2236,7 +2737,7 @@ void MeshTaskShader::exportPrimitive() { if (fsBuiltInUsage.viewportIndex) { // NOTE: In such case, mesh shader doesn't export viewport index while fragment shader expects to read it. We // export 0 to fragment shader, which is required by spec. - fsViewportIndex = m_builder.getInt32(0); + viewportIndex = m_builder.getInt32(0); exportViewportIndex = true; } } @@ -2245,96 +2746,20 @@ void MeshTaskShader::exportPrimitive() { if (exportViewportIndex) { const unsigned exportSlot = getOutputExportSlot(BuiltInViewportIndex, true); if (exportSlot != InvalidValue) { - assert(fsViewportIndex); - primAttrExports.push_back({exportSlot, fsViewportIndex}); + assert(viewportIndex); + attributeExports.push_back({exportSlot, viewportIndex}); } } - doExport(ExportKind::PrimAttr, primAttrExports); + doExport(ExportKind::PrimitiveAttribute, attributeExports); } // ===================================================================================================================== -// Export vertex (vertex position data and vertex attributes). -void MeshTaskShader::exportVertex() { - const auto &builtInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->builtInUsage.mesh; - - // Export vertex position data - SmallVector posExports; - - if (builtInUsage.position) { - auto position = readMeshBuiltInFromLds(BuiltInPosition); - std::array positions = { - m_builder.CreateExtractElement(position, static_cast(0)), m_builder.CreateExtractElement(position, 1), - m_builder.CreateExtractElement(position, 2), m_builder.CreateExtractElement(position, 3)}; - - posExports.push_back({0, positions}); - } - - if (builtInUsage.pointSize) { - auto pointSize = readMeshBuiltInFromLds(BuiltInPointSize); - posExports.push_back({1, pointSize}); - } - - SmallVector clipDistances; - if (builtInUsage.clipDistance > 0) { - auto clipDistance = readMeshBuiltInFromLds(BuiltInClipDistance); - for (unsigned i = 0; i < builtInUsage.clipDistance; ++i) - clipDistances.push_back(m_builder.CreateExtractElement(clipDistance, i)); - } - - SmallVector cullDistances; - if (builtInUsage.cullDistance > 0) { - auto cullDistance = readMeshBuiltInFromLds(BuiltInCullDistance); - for (unsigned i = 0; i < builtInUsage.cullDistance; ++i) - cullDistances.push_back(m_builder.CreateExtractElement(cullDistance, i)); - } - - SmallVector clipCullDistances; - if (builtInUsage.clipDistance > 0 || builtInUsage.cullDistance > 0) { - assert(builtInUsage.clipDistance + builtInUsage.cullDistance <= MaxClipCullDistanceCount); - - // Merge clipDistance and cullDistance - for (auto clipDistance : clipDistances) - clipCullDistances.push_back(clipDistance); - - for (auto cullDistance : cullDistances) - clipCullDistances.push_back(cullDistance); - - // Do array padding - auto poison = PoisonValue::get(m_builder.getFloatTy()); - if (clipCullDistances.size() <= 4) { - while (clipCullDistances.size() < 4) // <4 x float> - clipCullDistances.push_back(poison); - } else { - while (clipCullDistances.size() < 8) // <8 x float> - clipCullDistances.push_back(poison); - } - - unsigned pos = builtInUsage.pointSize ? 2 : 1; - posExports.push_back( - {pos, {clipCullDistances[0], clipCullDistances[1], clipCullDistances[2], clipCullDistances[3]}}); - - if (clipCullDistances.size() > 4) { - // Do the second exporting - posExports.push_back( - {pos + 1, {clipCullDistances[4], clipCullDistances[5], clipCullDistances[6], clipCullDistances[7]}}); - } - } - - bool waAtmPrecedesPos = false; - if (m_gfxIp.major >= 11) - waAtmPrecedesPos = m_pipelineState->getTargetInfo().getGpuWorkarounds().gfx11.waAtmPrecedesPos; - - if (!waAtmPrecedesPos) - doExport(ExportKind::Pos, posExports); - - SmallVector vertAttrExports; +// Export vertex attributes +void MeshTaskShader::exportVertexAttributes() { + SmallVector attributeExports; // Export vertex attributes (from generic outputs) - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::VertexOutput)); - auto vertexOffset = - m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.vertexStride)); - auto &vertexOutputComponents = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.vertexOutputComponents; for (auto &vertexOutput : vertexOutputComponents) { @@ -2345,13 +2770,7 @@ void MeshTaskShader::exportVertex() { if (forBuiltIn != InvalidValue) continue; // Skip those special outputs mapped from vertex built-ins. They will be handled later on. - auto offsetInVertex = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, false)); - - auto ldsOffset = ldsStart; - ldsOffset = m_builder.CreateAdd(ldsOffset, vertexOffset); - ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInVertex); - - auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), numComponents), ldsOffset); + auto exportValue = readBackMeshGenericOutput(location, false); SmallVector exporteValues; for (unsigned i = 0; i < numComponents; ++i) @@ -2368,14 +2787,50 @@ void MeshTaskShader::exportVertex() { unsigned exportSlot = getOutputExportSlot(location, false); assert(exportSlot != InvalidValue); - vertAttrExports.push_back({exportSlot, exporteValues[0], exporteValues[1], exporteValues[2], exporteValues[3]}); + attributeExports.push_back({exportSlot, exporteValues[0], exporteValues[1], exporteValues[2], exporteValues[3]}); if (numComponents > 4) - vertAttrExports.push_back( + attributeExports.push_back( {exportSlot + 1, exporteValues[4], exporteValues[5], exporteValues[6], exporteValues[7]}); } // Export vertex attributes (from built-ins as generic ones) + const auto &builtInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->builtInUsage.mesh; + if (builtInUsage.clipDistance > 0 || builtInUsage.cullDistance > 0) { + assert(builtInUsage.clipDistance + builtInUsage.cullDistance <= MaxClipCullDistanceCount); + + SmallVector clipDistances; + if (builtInUsage.clipDistance > 0) { + auto clipDistance = readBackMeshBuiltInOutput(BuiltInClipDistance); + for (unsigned i = 0; i < builtInUsage.clipDistance; ++i) + clipDistances.push_back(m_builder.CreateExtractElement(clipDistance, i)); + } + + SmallVector cullDistances; + if (builtInUsage.cullDistance > 0) { + auto cullDistance = readBackMeshBuiltInOutput(BuiltInCullDistance); + for (unsigned i = 0; i < builtInUsage.cullDistance; ++i) + cullDistances.push_back(m_builder.CreateExtractElement(cullDistance, i)); + } + + // Merge clipDistance and cullDistance + SmallVector clipCullDistances; + for (auto clipDistance : clipDistances) + clipCullDistances.push_back(clipDistance); + + for (auto cullDistance : cullDistances) + clipCullDistances.push_back(cullDistance); + + // Do array padding + auto poison = PoisonValue::get(m_builder.getFloatTy()); + if (clipCullDistances.size() <= 4) { + while (clipCullDistances.size() < 4) // <4 x float> + clipCullDistances.push_back(poison); + } else { + while (clipCullDistances.size() < 8) // <8 x float> + clipCullDistances.push_back(poison); + } + bool exportClipCullDistance = true; auto nextStage = m_pipelineState->getNextShaderStage(ShaderStage::Mesh); @@ -2419,25 +2874,18 @@ void MeshTaskShader::exportVertex() { } assert(exportSlot != InvalidValue); - vertAttrExports.push_back( + attributeExports.push_back( {exportSlot, {clipCullDistances[0], clipCullDistances[1], clipCullDistances[2], clipCullDistances[3]}}); if (clipCullDistances.size() > 4) { // Do the second exporting - vertAttrExports.push_back( + attributeExports.push_back( {exportSlot + 1, {clipCullDistances[4], clipCullDistances[5], clipCullDistances[6], clipCullDistances[7]}}); } } } - doExport(ExportKind::VertAttr, vertAttrExports); - if (waAtmPrecedesPos) { - // Before the first export call of vertex position data, add s_wait_vscnt 0 to make sure the completion of all - // attributes being written to the attribute ring buffer - m_builder.CreateFence(AtomicOrdering::Release, m_builder.getContext().getOrInsertSyncScopeID("agent")); - - doExport(ExportKind::Pos, posExports); - } + doExport(ExportKind::VertexAttribute, attributeExports); } // ===================================================================================================================== @@ -2527,14 +2975,14 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { unsigned target = InvalidValue; switch (kind) { - case ExportKind::Pos: + case ExportKind::Position: target = EXP_TARGET_POS_0; break; - case ExportKind::Prim: + case ExportKind::Primitive: target = EXP_TARGET_PRIM; break; - case ExportKind::VertAttr: - case ExportKind::PrimAttr: + case ExportKind::VertexAttribute: + case ExportKind::PrimitiveAttribute: target = EXP_TARGET_PARAM_0; break; default: @@ -2543,12 +2991,12 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { } bool exportDone = false; - if ((kind == ExportKind::Pos || kind == ExportKind::Prim) && i == exports.size() - 1) + if ((kind == ExportKind::Position || kind == ExportKind::Primitive) && i == exports.size() - 1) exportDone = true; // Last export if (m_gfxIp.major >= 11) { - if (m_pipelineState->exportAttributeByExportInstruction() || kind == ExportKind::Pos || - kind == ExportKind::Prim) { + if (m_pipelineState->exportAttributeByExportInstruction() || kind == ExportKind::Position || + kind == ExportKind::Primitive) { m_builder.CreateIntrinsic(Intrinsic::amdgcn_exp_row, valueTy, { m_builder.getInt32(target + exports[i].slot), // tgt @@ -2561,7 +3009,7 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { m_waveThreadInfo.rowInSubgroup, // row number }); } else { - assert(kind == ExportKind::VertAttr || kind == ExportKind::PrimAttr); + assert(kind == ExportKind::VertexAttribute || kind == ExportKind::PrimitiveAttribute); assert(!m_pipelineState->exportAttributeByExportInstruction()); Value *valueToStore = PoisonValue::get(FixedVectorType::get(valueTy, 4)); @@ -2605,15 +3053,15 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { void MeshTaskShader::prepareAttribRingAccess() { assert(m_gfxIp.major >= 11); // Must be GFX11+ - unsigned attribCount = m_outputsLayout.vertexExportCount + m_outputsLayout.primitiveExportCount; - if (attribCount == 0) + unsigned numAttributes = m_outputsLayout.vertexExportCount + m_outputsLayout.primitiveExportCount; + if (numAttributes == 0) return; // No attribute export // NOTE: HW allocates and manages attribute ring based on the register fields: VS_EXPORT_COUNT and PRIM_EXPORT_COUNT. // When VS_EXPORT_COUNT = 0, HW assumes there is still a vertex attribute exported even though this is not what we // want. Hence, we should reserve param0 as a dummy vertex attribute. if (m_outputsLayout.vertexExportCount == 0) - ++attribCount; // Count in this dummy vertex attribute + ++numAttributes; // Count in this dummy vertex attribute // attribRingBase[14:0] auto entryPoint = m_builder.GetInsertBlock()->getParent(); @@ -2626,12 +3074,12 @@ void MeshTaskShader::prepareAttribRingAccess() { m_attribRingBufDesc = m_pipelineSysValues.get(entryPoint)->getAttribRingBufDesc(); // Modify the field STRIDE of attribute ring buffer descriptor - if (attribCount >= 2) { + if (numAttributes >= 2) { // STRIDE = WORD1[30:16], STRIDE is initialized to 16 by the driver, which is the right value for attribCount == 1. // We override the value if there are more attributes. auto descWord1 = m_builder.CreateExtractElement(m_attribRingBufDesc, 1); - auto stride = m_builder.getInt32(attribCount * SizeOfVec4); - if ((attribCount & 1) == 0) { + auto stride = m_builder.getInt32(numAttributes * SizeOfVec4); + if ((numAttributes & 1) == 0) { // Clear the bit that was set in STRIDE by the driver. descWord1 = m_builder.CreateAnd(descWord1, ~0x3FFF0000); } @@ -2831,21 +3279,22 @@ Value *MeshTaskShader::getMeshGlobalInvocationId() { } // ===================================================================================================================== -// Read mesh shader built-in value from LDS, which is supposed to be written by mesh shader execution. +// Read back mesh shader built-in output value from output allocas or LDS, which is supposed to be written by mesh +// shader execution. // // @param builtIn : Mesh shader built-in -// @returns : The built-in value from LDS -Value *MeshTaskShader::readMeshBuiltInFromLds(BuiltInKind builtIn) { +// @returns : The built-in output value from output allocas or LDS +Value *MeshTaskShader::readBackMeshBuiltInOutput(BuiltInKind builtIn) { const auto &builtInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->builtInUsage.mesh; auto &inOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage; - bool isPerPrimitive = (builtIn == BuiltInPrimitiveId || builtIn == BuiltInViewportIndex || builtIn == BuiltInLayer || - builtIn == BuiltInPrimitiveShadingRate); + bool primitive = (builtIn == BuiltInPrimitiveId || builtIn == BuiltInViewportIndex || builtIn == BuiltInLayer || + builtIn == BuiltInPrimitiveShadingRate); unsigned location = InvalidValue; MeshLdsRegion region = MeshLdsRegion::VertexOutput; - if (isPerPrimitive) { + if (primitive) { assert(inOutUsage.perPrimitiveBuiltInOutputLocMap.count(builtIn) > 0); location = inOutUsage.perPrimitiveBuiltInOutputLocMap[builtIn]; region = MeshLdsRegion::PrimitiveOutput; @@ -2894,27 +3343,75 @@ Value *MeshTaskShader::readMeshBuiltInFromLds(BuiltInKind builtIn) { break; } - // ldsOffset = ldsStart + primOrVertexIndex * primOrVertexStride + offsetInPrimOrVertex - Value *primOrVertexOffset = nullptr; - if (region == MeshLdsRegion::VertexOutput) { - primOrVertexOffset = - m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.vertexStride)); + Value *readValue = nullptr; + + if (m_outputsLayout.outputsToAllocas) { + auto outputAlloca = getOutputAlloca(location, primitive); + readValue = m_builder.CreateAlignedLoad(readTy, outputAlloca, Align(4)); } else { - assert(region == MeshLdsRegion::PrimitiveOutput); - primOrVertexOffset = - m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.primitiveStride)); + // ldsOffset = ldsStart + primOrVertexIndex * primOrVertexStride + offsetInPrimOrVertex + Value *primOrVertexOffset = nullptr; + if (region == MeshLdsRegion::VertexOutput) { + primOrVertexOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.vertexStride)); + } else { + assert(region == MeshLdsRegion::PrimitiveOutput); + primOrVertexOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.primitiveStride)); + } + + Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart( + region == MeshLdsRegion::PrimitiveOutput ? MeshLdsRegion::PrimitiveOutput : MeshLdsRegion::VertexOutput)); + Value *offsetInPrimOrVertex = + m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, region == MeshLdsRegion::PrimitiveOutput)); + + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, primOrVertexOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimOrVertex); + + readValue = readValueFromLds(readTy, ldsOffset); } - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart( - region == MeshLdsRegion::PrimitiveOutput ? MeshLdsRegion::PrimitiveOutput : MeshLdsRegion::VertexOutput)); - Value *offsetInPrimOrVertex = - m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, region == MeshLdsRegion::PrimitiveOutput)); + return readValue; +} + +// ===================================================================================================================== +// Read back mesh shader generic output value from output allocas or LDS, which is supposed to be written by mesh +// shader execution. +// +// @param location : Output generic location +// @param primitive : Whether this is a primitive output +// @returns : The generic output value from output allocas or LDS +Value *MeshTaskShader::readBackMeshGenericOutput(unsigned location, bool primitive) { + auto &outputComponents = + primitive ? m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.primitiveOutputComponents + : m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.vertexOutputComponents; + assert(outputComponents.count(location) > 0); // Must exist + const unsigned numComponents = outputComponents[location].first; + + Value *readValue = nullptr; + auto readTy = FixedVectorType::get(m_builder.getFloatTy(), numComponents); - auto ldsOffset = ldsStart; - ldsOffset = m_builder.CreateAdd(ldsOffset, primOrVertexOffset); - ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimOrVertex); + if (m_outputsLayout.outputsToAllocas) { + auto outputAlloca = getOutputAlloca(location, primitive); + readValue = m_builder.CreateAlignedLoad(readTy, outputAlloca, Align(4)); + } else { + Value *ldsStart = m_builder.getInt32( + getMeshShaderLdsRegionStart(primitive ? MeshLdsRegion::PrimitiveOutput : MeshLdsRegion::VertexOutput)); + const unsigned primOrVertexStride = primitive ? m_outputsLayout.primitiveStride : m_outputsLayout.vertexStride; + Value *primOrVertexOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(primOrVertexStride)); + + auto offsetInPrimOrVertex = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, primitive)); + + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, primOrVertexOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimOrVertex); + + readValue = readValueFromLds(readTy, ldsOffset); + } - return readValueFromLds(readTy, ldsOffset); + return readValue; } // ===================================================================================================================== @@ -3120,7 +3617,7 @@ void MeshTaskShader::updateMeshShaderInOutUsage() { // @param entryPoint : Entry-point of mesh shader // @returns : Value indicating whether barrier completion flag is needed bool MeshTaskShader::checkNeedBarrierFlag(Function *entryPoint) { - if (m_pipelineState->enableMeshRowExport()) + if (usesRowExport(m_pipelineState)) return false; // Not needed if row export is enable const auto &meshMode = m_pipelineState->getShaderModes()->getMeshShaderMode(); @@ -3171,8 +3668,9 @@ bool MeshTaskShader::checkNeedBarrierFlag(Function *entryPoint) { // // @param readTy : Type of value to read // @param ldsOffset : LDS offset in dwords +// @param alignment : Alignment of read operation (in bytes) // @returns : The Value read from LDS -Value *MeshTaskShader::readValueFromLds(Type *readTy, Value *ldsOffset) { +Value *MeshTaskShader::readValueFromLds(Type *readTy, Value *ldsOffset, unsigned alignment) { assert(m_lds); assert(readTy->isIntOrIntVectorTy() || readTy->isFPOrFPVectorTy()); @@ -3189,7 +3687,7 @@ Value *MeshTaskShader::readValueFromLds(Type *readTy, Value *ldsOffset) { readPtr = m_builder.CreateBitCast(readPtr, PointerType::get(newReadTy, readPtr->getType()->getPointerAddressSpace())); - Value *readValue = m_builder.CreateAlignedLoad(newReadTy, readPtr, Align(4)); + Value *readValue = m_builder.CreateAlignedLoad(newReadTy, readPtr, Align(alignment)); Type *truncTy = m_builder.getIntNTy(bitWidth); if (readTy->isVectorTy()) @@ -3204,7 +3702,7 @@ Value *MeshTaskShader::readValueFromLds(Type *readTy, Value *ldsOffset) { } readPtr = m_builder.CreateBitCast(readPtr, PointerType::get(readTy, readPtr->getType()->getPointerAddressSpace())); - return m_builder.CreateAlignedLoad(readTy, readPtr, Align(4)); + return m_builder.CreateAlignedLoad(readTy, readPtr, Align(alignment)); } // ===================================================================================================================== @@ -3212,7 +3710,8 @@ Value *MeshTaskShader::readValueFromLds(Type *readTy, Value *ldsOffset) { // // @param writeValue : Value to write // @param ldsOffset : LDS offset in dwords -void MeshTaskShader::writeValueToLds(Value *writeValue, Value *ldsOffset) { +// @param alignment : Alignment of write operation (in bytes) +void MeshTaskShader::writeValueToLds(Value *writeValue, Value *ldsOffset, unsigned alignment) { assert(m_lds); auto writeTy = writeValue->getType(); @@ -3241,13 +3740,13 @@ void MeshTaskShader::writeValueToLds(Value *writeValue, Value *ldsOffset) { writePtr = m_builder.CreateBitCast( writePtr, PointerType::get(writeValue->getType(), writePtr->getType()->getPointerAddressSpace())); - m_builder.CreateAlignedStore(writeValue, writePtr, Align(4)); + m_builder.CreateAlignedStore(writeValue, writePtr, Align(alignment)); return; } writePtr = m_builder.CreateBitCast( writePtr, PointerType::get(writeValue->getType(), writePtr->getType()->getPointerAddressSpace())); - m_builder.CreateAlignedStore(writeValue, writePtr, Align(4)); + m_builder.CreateAlignedStore(writeValue, writePtr, Align(alignment)); } // ===================================================================================================================== diff --git a/lgc/lowering/MeshTaskShader.h b/lgc/lowering/MeshTaskShader.h index c13cb84fe3..71086d30f8 100644 --- a/lgc/lowering/MeshTaskShader.h +++ b/lgc/lowering/MeshTaskShader.h @@ -49,13 +49,12 @@ struct MeshPipeStatsEntry { // Enumerates the LDS regions used by mesh shader enum class MeshLdsRegion : unsigned { - VertexCount = 0, // Vertex count set by SetMeshOutputs - PrimitiveCount, // Primitive count set by SetMeshOutputs - BarrierCompletion, // Barrier completion flag - FlatWorkgroupId, // Flat workgroup ID - PrimitiveIndices, // Primitive indices set by SetPrimitiveIndices - VertexOutput, // Per-vertex outputs - PrimitiveOutput, // Per-primitive outputsr + MeshOutputCounts = 0, // Mesh output counts (vertexCount, primitiveCount) set by SetMeshOutputs + BarrierCompletion, // Barrier completion flag + FlatWorkgroupId, // Flat workgroup ID + PrimitiveIndices, // Primitive indices set by SetPrimitiveIndices + VertexOutput, // Per-vertex outputs + PrimitiveOutput, // Per-primitive outputsr }; // Map: LDS Region -> @@ -66,17 +65,22 @@ struct MeshOutputsLayout { std::map vertexBuiltInExports; // Map from vertex built-in output ID to export slot std::map vertexGenericExports; // Map from vertex output location to export slot // (exported as vertex attributes) - unsigned vertexExportCount; // Vertex export count + unsigned vertexExportCount = 0; // Vertex export count std::map primitiveBuiltInExports; // Map from primitive built-in output ID to export slot // (exported as primitive attributes) std::map primitiveGenericExports; // Map from primitive output location to export slot - unsigned primitiveExportCount; // Primitive export count + unsigned primitiveExportCount = 0; // Primitive export count - unsigned vertexStride; // Vertex stride (in dwords) + bool outputsToAllocas = false; // Write outputs to allocas + llvm::AllocaInst *primitiveDataAlloca = nullptr; // Primitive connectivity data alloca + std::map vertexOutputAllocas; // Map from vertex output location to output alloca + std::map primitiveOutputAllocas; // Map from primitive output location to output alloca + + unsigned vertexStride = 0; // Vertex stride (in dwords) std::map offsetsInVertex; // Map from output location to output offset within a vertex (in dwords) - unsigned primitiveStride; // Primitive stride (in dwords) + unsigned primitiveStride = 0; // Primitive stride (in dwords) std::map offsetsInPrimitive; // Map from output location to output offset within a primitive // (in dwords) }; @@ -95,6 +99,8 @@ class MeshTaskShader { private: static llvm::GlobalVariable *getOrCreateMeshLds(llvm::Module *module, unsigned meshLdsSizeInDwords = 0); static unsigned useFlatWorkgroupId(PipelineState *pipelineState); + static bool usesRowExport(PipelineState *pipelineState); + static bool meshOutputsToAllocas(PipelineState *pipelineState, llvm::Function *entryPoint); void processTaskShader(llvm::Function *entryPoint); void processMeshShader(llvm::Function *entryPoint); @@ -120,15 +126,17 @@ class MeshTaskShader { void lowerMeshShaderBody(llvm::BasicBlock *apiMeshEntryBlock, llvm::BasicBlock *apiMeshExitBlock); void exportPrimitive(); - void exportVertex(); + void exportPositions(); + void exportPrimitiveAttributes(); + void exportVertexAttributes(); void collectMeshStatsInfo(llvm::Function *entryPoint, llvm::Value *numMeshPrimitives); // Export kind enum class ExportKind : unsigned { - Pos = 0, - Prim = 1, - VertAttr = 2, - PrimAttr = 3, + Position = 0, + Primitive = 1, + VertexAttribute = 2, + PrimitiveAttribute = 3, }; // Export info of a single entry struct ExportInfo { @@ -146,7 +154,8 @@ class MeshTaskShader { llvm::Value *getMeshLocalInvocationIndex(); llvm::Value *getMeshGlobalInvocationId(); - llvm::Value *readMeshBuiltInFromLds(BuiltInKind builtIn); + llvm::Value *readBackMeshBuiltInOutput(BuiltInKind builtIn); + llvm::Value *readBackMeshGenericOutput(unsigned location, bool primitive); llvm::Value *convertToHwShadingRate(llvm::Value *primitiveShadingRate); void updateMeshShaderInOutUsage(); @@ -191,8 +200,21 @@ class MeshTaskShader { return InvalidValue; // Not exist } - llvm::Value *readValueFromLds(llvm::Type *readTy, llvm::Value *ldsOffset); - void writeValueToLds(llvm::Value *writeValue, llvm::Value *ldsOffset); + llvm::Value *getOutputAlloca(unsigned location, bool primitive) { + assert(m_outputsLayout.outputsToAllocas); + if (primitive) { + if (m_outputsLayout.primitiveOutputAllocas.count(location) > 0) + return m_outputsLayout.primitiveOutputAllocas[location]; + return nullptr; + } + + if (m_outputsLayout.vertexOutputAllocas.count(location) > 0) + return m_outputsLayout.vertexOutputAllocas[location]; + return nullptr; + } + + llvm::Value *readValueFromLds(llvm::Type *readTy, llvm::Value *ldsOffset, unsigned alignment = 4); + void writeValueToLds(llvm::Value *writeValue, llvm::Value *ldsOffset, unsigned alignment = 4); void atomicOpWithLds(llvm::AtomicRMWInst::BinOp atomicOp, llvm::Value *atomicValue, llvm::Value *ldsOffset); void createFenceAndBarrier(); void createBarrier(); diff --git a/lgc/lowering/MutateEntryPoint.cpp b/lgc/lowering/MutateEntryPoint.cpp index 005b7182ce..55a9bef594 100644 --- a/lgc/lowering/MutateEntryPoint.cpp +++ b/lgc/lowering/MutateEntryPoint.cpp @@ -56,6 +56,7 @@ #include "lgc/lowering/MutateEntryPoint.h" #include "ShaderMerger.h" #include "compilerutils/CompilerUtils.h" +#include "llpc/GpurtEnums.h" #include "llvmraytracing/ContinuationsUtil.h" #include "lgc/LgcContext.h" #include "lgc/LgcCpsDialect.h" @@ -87,12 +88,22 @@ using namespace llvm; using namespace lgc; using namespace cps; +static cl::opt UseInitWholeWave("lgc-use-init-whole-wave", + cl::desc("Use the llvm.amdgcn.init.whole.wave intrinsic"), cl::init(false)); + +// ===================================================================================================================== +bool MutateEntryPoint::useInitWholeWave() const { + return UseInitWholeWave && m_initWholeWaveId != llvm::Intrinsic::not_intrinsic; +} + // ===================================================================================================================== MutateEntryPoint::MutateEntryPoint() : m_hasTs(false), m_hasGs(false) { #if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 513481 m_setInactiveChainArgId = Function::lookupIntrinsicID("llvm.amdgcn.set.inactive.chain.arg"); + m_initWholeWaveId = Function::lookupIntrinsicID("llvm.amdgcn.init.whole.wave"); #else m_setInactiveChainArgId = Intrinsic::lookupIntrinsicID("llvm.amdgcn.set.inactive.chain.arg"); + m_initWholeWaveId = Intrinsic::lookupIntrinsicID("llvm.amdgcn.init.whole.wave"); #endif } @@ -494,15 +505,13 @@ void MutateEntryPoint::lowerAsCpsReference(cps::AsContinuationReferenceOp &asCps // continuation transform, under which we still need to pass ShaderInput arguments(WorkgroupId/LocalInvocationId) during // cps chain call. bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { - SmallVector cpsJumps; - SmallVector tobeErased; - struct Payload { - SmallVectorImpl &jumps; - SmallVectorImpl &tobeErased; - MutateEntryPoint *self; + SmallVector jumps; + SmallVector tobeErased; + MutateEntryPoint *self = nullptr; }; - Payload payload = {cpsJumps, tobeErased, this}; + Payload payload; + payload.self = this; static auto visitor = llvm_dialects::VisitorBuilder() .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) @@ -514,11 +523,11 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { .build(); visitor.visit(payload, *func); - for (auto *call : tobeErased) + for (auto *call : payload.tobeErased) call->eraseFromParent(); bool isCpsFunc = cps::isCpsFunction(*func); - if (!isCpsFunc && cpsJumps.empty()) + if (!isCpsFunc && payload.jumps.empty()) return false; // Get the number of user-data arguments. @@ -550,8 +559,44 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { SmallVector exitInfos; IRBuilder<> builder(func->getContext()); + // If init.whole.wave is available, generate a new entry block to initialize the whole wave: + // entry.block: + // %orig.exec = llvm.amdgcn.init.whole.wave() + // br %orig.exec, %func, %tail.block + // func: + // ... + // br %tail.block + // tail.block: + // ... + bool useIWW = useInitWholeWave(); + if (isCpsFunc && useIWW) { + auto *entryBlock = &func->getEntryBlock(); + BasicBlock *shaderBlock = entryBlock->splitBasicBlock(entryBlock->getFirstNonPHIOrDbgOrAlloca()); + builder.SetInsertPoint(entryBlock, entryBlock->getFirstNonPHIOrDbgOrAlloca()); + + // For the extra VGPR args, we'll have to preserve the values in the inactive + // lanes. This is achieved by adding the original values to Phi nodes in the + // tail block - but first we will have to split them into i32. Do this in + // the entry block, before inserting the init.whole.wave intrinsic. + SmallVector remainingArgs; + for (Argument &arg : drop_begin(func->args(), numShaderArg)) + remainingArgs.push_back(&arg); + + SmallVector vgprArgs; + splitIntoI32(func->getParent()->getDataLayout(), builder, remainingArgs, vgprArgs); + + exitInfos.push_back(CpsExitInfo(entryBlock, std::move(vgprArgs))); + + // Now we can finally insert the init.whole.wave intrinsic. + auto *originalExec = builder.CreateIntrinsic(builder.getInt1Ty(), m_initWholeWaveId, {}); + builder.CreateCondBr(originalExec, shaderBlock, tailBlock); + + // Remove the unconditional branch inserted by splitBB(). + entryBlock->getTerminator()->eraseFromParent(); + } + // Lower cps jumps. - for (auto *jump : cpsJumps) + for (auto *jump : payload.jumps) lowerCpsJump(func, jump, tailBlock, exitInfos); // Lower returns. @@ -620,26 +665,30 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { // it and call it. LLVM will misrecognize it as llvm.amdgcn.set.inactive, and lit-test would just fail. So here we // just call llvm.amdgcn.set.inactive to pass compilation and lit-test if no *set.inactive.chain.arg support. // TODO: Cleanup this when the related LLVM versions have the intrinsic definition. - if (m_setInactiveChainArgId != Intrinsic::not_intrinsic) - vcr = builder.CreateIntrinsic(vcrTy, m_setInactiveChainArgId, {vcr, vcrShaderArg}); - else - vcr = builder.CreateIntrinsic(vcrTy, Intrinsic::amdgcn_set_inactive, {vcr, vcrShaderArg}); + if (!useIWW) { + if (m_setInactiveChainArgId != Intrinsic::not_intrinsic) + vcr = builder.CreateIntrinsic(vcrTy, m_setInactiveChainArgId, {vcr, vcrShaderArg}); + else + vcr = builder.CreateIntrinsic(vcrTy, Intrinsic::amdgcn_set_inactive, {vcr, vcrShaderArg}); + } auto level = builder.CreateAnd(vcr, builder.getInt32(0x7)); auto funcLevel = static_cast(cps::getCpsLevelFromFunction(*func)); - static const std::vector priorities[] = { + static const std::vector priorities[] = { // RayGen: Continue with RayGen or hit shaders - {CpsLevel::Traversal, CpsLevel::ClosestHit_Miss_Callable, CpsLevel::RayGen}, + {CpsSchedulingLevel::Traversal, CpsSchedulingLevel::ClosestHit_Miss_Callable, CpsSchedulingLevel::RayGen}, // ClosestHit_Miss_Callable: Continue with hit shaders, then resume RayGen - {CpsLevel::Traversal, CpsLevel::RayGen, CpsLevel::ClosestHit_Miss_Callable}, + {CpsSchedulingLevel::Traversal, CpsSchedulingLevel::RayGen, CpsSchedulingLevel::ClosestHit_Miss_Callable}, // Traversal: Call Intersection or AnyHit, then call hit shaders or continue with RayGen // Traversal can continue with traversal when it wants to wait, so try that last - {CpsLevel::Traversal, CpsLevel::RayGen, CpsLevel::ClosestHit_Miss_Callable, - CpsLevel::AnyHit_CombinedIntersection_AnyHit, CpsLevel::Intersection}, + {CpsSchedulingLevel::Traversal, CpsSchedulingLevel::RayGen, CpsSchedulingLevel::ClosestHit_Miss_Callable, + CpsSchedulingLevel::AnyHit_CombinedIntersection_AnyHit, CpsSchedulingLevel::Intersection}, // AnyHit_CombinedIntersection_AnyHit: Continue with AnyHit, then resume Traversal - {CpsLevel::Traversal, CpsLevel::Intersection, CpsLevel::AnyHit_CombinedIntersection_AnyHit}, + {CpsSchedulingLevel::Traversal, CpsSchedulingLevel::Intersection, + CpsSchedulingLevel::AnyHit_CombinedIntersection_AnyHit}, // Intersection: Continue with Intersection, then resume Traversal - {CpsLevel::Traversal, CpsLevel::AnyHit_CombinedIntersection_AnyHit, CpsLevel::Intersection}}; + {CpsSchedulingLevel::Traversal, CpsSchedulingLevel::AnyHit_CombinedIntersection_AnyHit, + CpsSchedulingLevel::Intersection}}; // Get non-zero level execution Mask pendingBallot = takeLevel(level, builder, waveMaskTy, priorities[funcLevel - 1]); } else { @@ -656,7 +705,7 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { auto *targetMask = builder.CreateICmpEQ(vcr, targetVcr); auto *execMask = builder.CreateIntrinsic(Intrinsic::amdgcn_ballot, waveMaskTy, targetMask); - if (isCpsFunc) { + if (isCpsFunc && !useIWW) { targetVcr = builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_wwm, targetVcr); execMask = builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_wwm, execMask); } @@ -678,7 +727,10 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { builder.SetInsertPoint(chainBlock); // Mask off metadata bits and setup jump target. Value *addr32 = builder.CreateAnd(targetVcr, builder.getInt32(~0x3fu)); - AddressExtender addressExtender(func); + // Insert jumpTarget computations in the tailBlock, since that is closer to where they will be used. + // These operations are expected to only use SGPRs, so it should be safe to run with or without all lanes + // enabled (i.e. regardless of useIWW's value). + AddressExtender addressExtender(func, tailBlock); Value *jumpTarget = addressExtender.extend(addr32, builder.getInt32(HighAddrPc), builder.getPtrTy(), builder); const DataLayout &layout = func->getParent()->getDataLayout(); @@ -728,11 +780,38 @@ bool MutateEntryPoint::lowerCpsOps(Function *func, ShaderInputs *shaderInputs) { Function *MutateEntryPoint::lowerCpsFunction(Function *func, ArrayRef fixedShaderArgTys, ArrayRef argNames) { IRBuilder<> builder(func->getContext()); + AttributeList oldAttrs = func->getAttributes(); + SmallVector newArgTys; newArgTys.append(fixedShaderArgTys.begin(), fixedShaderArgTys.end()); newArgTys.push_back(builder.getInt32Ty()); auto remainingArgs = func->getFunctionType()->params(); newArgTys.append(remainingArgs.begin(), remainingArgs.end()); + + // If init.whole.wave is available, we need to pad the argument list up to the maximum number of VGPRs used for this + // pipeline, so that we can preserve the inactive lanes for these VGPRs. + int numInactiveVgprs = 0; + bool useIWW = useInitWholeWave(); + if (useIWW) { + SmallVector remainingVgprArgs; + for (unsigned idx = 0; idx < remainingArgs.size(); ++idx) + if (!oldAttrs.getParamAttrs(idx).hasAttribute(Attribute::InReg)) + remainingVgprArgs.push_back(remainingArgs[idx]); + + const DataLayout &layout = func->getParent()->getDataLayout(); + std::optional argBound = lgc::cps::getMaxArgumentVgprs(*func->getParent()); + if (!argBound.has_value()) + report_fatal_error("Missing lgc.cps.maxArgumentVgprs metadata"); + + numInactiveVgprs = *argBound - lgc::cps::getArgumentDwordCount(layout, remainingVgprArgs); + + if (numInactiveVgprs < 0) + report_fatal_error("Invalid number of inactive VGPRs, check lgc.cps.maxArgumentVgprs"); + + for (int i = 0; i < numInactiveVgprs; ++i) + newArgTys.push_back(builder.getInt32Ty()); + } + FunctionType *newFuncTy = FunctionType::get(builder.getVoidTy(), newArgTys, false); auto newFunc = createFunctionHelper(newFuncTy, func->getLinkage(), func->getParent()); newFunc->copyAttributesFrom(func); @@ -749,7 +828,6 @@ Function *MutateEntryPoint::lowerCpsFunction(Function *func, ArrayRef fi assert(haveLocalInvocationId == (argNames.back() == "LocalInvocationId") || (argNames[argNames.size() - 2] == "LocalInvocationId")); - AttributeList oldAttrs = func->getAttributes(); SmallVector argAttrs; unsigned numUserdataArg = haveLocalInvocationId ? fixedShaderArgTys.size() - 1 : fixedShaderArgTys.size(); for (unsigned idx = 0; idx != numUserdataArg; ++idx) @@ -787,6 +865,12 @@ Function *MutateEntryPoint::lowerCpsFunction(Function *func, ArrayRef fi newArg->setName(oldArg->getName()); oldArg->replaceAllUsesWith(newArg); } + + if (useIWW) { + for (unsigned idx = newFunc->arg_size() - numInactiveVgprs; idx < newFunc->arg_size(); idx++) + newFunc->getArg(idx)->setName("inactive.vgpr"); + } + setShaderStage(newFunc, getShaderStage(func)); newFunc->setAlignment(Align(64)); newFunc->setCallingConv(CallingConv::AMDGPU_CS_Chain); @@ -801,7 +885,7 @@ Function *MutateEntryPoint::lowerCpsFunction(Function *func, ArrayRef fi // @param waveMaskTy : Wave Mask type // @param priorities : Priorities list Value *MutateEntryPoint::takeLevel(Value *level, IRBuilder<> &builder, Type *waveMaskTy, - ArrayRef priorities) { + ArrayRef priorities) { auto levelMask = builder.CreateICmpNE(level, builder.getInt32(0)); Value *levelBallot = builder.CreateIntrinsic(Intrinsic::amdgcn_ballot, waveMaskTy, levelMask); Value *cond = nullptr; diff --git a/lgc/lowering/NggPrimShader.cpp b/lgc/lowering/NggPrimShader.cpp index c56f76c6cf..17a0f1e70a 100644 --- a/lgc/lowering/NggPrimShader.cpp +++ b/lgc/lowering/NggPrimShader.cpp @@ -59,8 +59,8 @@ namespace lgc { // List of names of handler functions static const char NggEsMain[] = "lgc.ngg.ES.main"; -static const char NggEsCullDataFetcher[] = "lgc.ngg.ES.cull.data.fetcher"; -static const char NggEsVertexExporter[] = "lgc.ngg.ES.vertex.exporter"; +static const char NggEsFirstPart[] = "lgc.ngg.ES.first.part"; +static const char NggEsSecondPart[] = "lgc.ngg.ES.second.part"; static const char NggGsMain[] = "lgc.ngg.GS.main"; static const char NggCopyShader[] = "lgc.ngg.COPY.main"; @@ -75,7 +75,7 @@ static const char NggCullerSmallPrimFilter[] = "lgc.ngg.culler.small.prim.filter static const char NggCullerCullDistance[] = "lgc.ngg.culler.cull.distance"; static const char NggCullerRegFetcher[] = "lgc.ngg.culler.reg.fetcher"; -static const char NggXfbFetcher[] = "lgc.ngg.xfb.fetcher"; +static const char NggExportCollector[] = "lgc.ngg.export.collector"; // Represents GDS GRBM register for SW-emulated stream-out enum { @@ -408,8 +408,8 @@ PrimShaderLdsUsageInfo NggPrimShader::layoutPrimShaderLds(PipelineState *pipelin // XFB outputs if (pipelineState->enableSwXfb()) { if (ldsLayout) { - ldsRegionSize = hwConfig.esVertsPerSubgroup * - hwConfig.esGsRingItemSize; // Transform feedback outputs are stored as a ES-GS ring item + ldsRegionSize = + hwConfig.esVertsPerSubgroup * hwConfig.esGsRingItemSize; // XFB outputs are stored as a ES-GS ring item printLdsRegionInfo("XFB Outputs", ldsOffset, ldsRegionSize); (*ldsLayout)[PrimShaderLdsRegion::XfbOutput] = std::make_pair(ldsOffset, ldsRegionSize); @@ -759,14 +759,14 @@ unsigned NggPrimShader::calcVertexCullInfoSizeAndOffsets(PipelineState *pipeline } // ===================================================================================================================== -// Calculate and return the dword size of total transform feedback outputs to write for the ES stage. +// Calculate and return the dword size of total XFB outputs to write for the ES stage. // // NOTE: For non 64-bit output, the value is its element count (8-bit/16-bit scalars are padded to 32-bit); for 64-bit // output, the value is doubled since each 64-bit scalar is split to two dwords to write. This info is used by ES (VS // or TES in non-GS pipeline) to write the outputs to NGG LDS space on GFX11+ to do SW emulated stream-out. // // @param esMain : ES main function -// @returns : Dword size of total transform feedback outputs to write +// @returns : Dword size of total XFB outputs to write unsigned NggPrimShader::calcEsXfbOutputsSize(Function *esMain) { unsigned xfbOutputsSize = 0; @@ -987,18 +987,19 @@ void NggPrimShader::buildPassthroughPrimShader(Function *primShader) { // if (waveId == 0) // Send GS_ALLOC_REQ message // - // if (threadIdInSubgroup < primCountInSubgroup) - // Export primitive + // if (threadIdInSubgroup < vertCountInSubgroup) + // Collect exports // // if (Enable SW XFB) - // Process SW XFB (Run ES) - // else { - // if (Enable primitive statistics counting) - // Collect primitive statistics + // Process SW XFB + // else if (Enable primitive statistics counting) + // Collect primitive statistics // - // if (threadIdInSubgroup < vertCountInSubgroup) - // Run ES (export vertex) - // } + // if (threadIdInSubgroup < primCountInSubgroup) + // Export primitive + // + // if (threadIdInSubgroup < vertCountInSubgroup) + // Export vertex // } // @@ -1008,6 +1009,9 @@ void NggPrimShader::buildPassthroughPrimShader(Function *primShader) { auto sendGsAllocReqBlock = createBlock(primShader, ".sendGsAllocReq"); auto endSendGsAllocReqBlock = createBlock(primShader, ".endSendGsAllocReq"); + auto collectExportBlock = createBlock(primShader, ".collectExport"); + auto endCollectExportBlock = createBlock(primShader, ".endCollectExport"); + auto exportPrimitiveBlock = createBlock(primShader, ".exportPrimitive"); auto endExportPrimitiveBlock = createBlock(primShader, ".endExportPrimitive"); @@ -1079,6 +1083,33 @@ void NggPrimShader::buildPassthroughPrimShader(Function *primShader) { { m_builder.SetInsertPoint(endSendGsAllocReqBlock); + auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); + m_builder.CreateCondBr(validVertex, collectExportBlock, endCollectExportBlock); + } + + // Construct ".collectExport" block + SmallVector positionExports; + SmallVector attributeExports; + SmallVector xfbExports; + { + m_builder.SetInsertPoint(collectExportBlock); + + collectExports(args, m_esHandlers.main, false, &positionExports, &attributeExports, &xfbExports); + + m_builder.CreateBr(endCollectExportBlock); + } + + // Construct ".endCollectExport" block + { + m_builder.SetInsertPoint(endCollectExportBlock); + + createPhiForExports(&positionExports, &attributeExports, &xfbExports); + + if (m_pipelineState->enableSwXfb()) + processSwXfb(args, xfbExports); + else if (m_pipelineState->enablePrimStats()) + collectPrimitiveStats(); + auto validPrimitive = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.primCountInSubgroup); m_builder.CreateCondBr(validPrimitive, exportPrimitiveBlock, endExportPrimitiveBlock); } @@ -1095,12 +1126,6 @@ void NggPrimShader::buildPassthroughPrimShader(Function *primShader) { { m_builder.SetInsertPoint(endExportPrimitiveBlock); - // Process SW XFB or primitive statistics counting - if (m_pipelineState->enableSwXfb()) - processSwXfb(args); - else if (m_pipelineState->enablePrimStats()) - collectPrimitiveStats(); - auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); m_builder.CreateCondBr(validVertex, exportVertexBlock, endExportVertexBlock); } @@ -1109,20 +1134,17 @@ void NggPrimShader::buildPassthroughPrimShader(Function *primShader) { { m_builder.SetInsertPoint(exportVertexBlock); - // NOTE: For NGG passthrough mode, if SW-emulated stream-out is enabled, running ES is included in processing - // transform feedback exporting. There won't be separated ES running (ES is not split any more). This is - // because we could encounter special cases in which there are memory atomics producing output values both for - // transform feedback exporting and for vertex exporting like following codes. The atomics shouldn't be separated - // and be run multiple times. - // - // void ES() { - // ... - // value = atomicXXX() - // xfbExport = value - // vertexExport = value - // } - if (!m_pipelineState->enableSwXfb()) - runEs(args); + // NOTE: If the workaround of attributes-through-memory preceding vertex position data is required, we have to + // place vertex exports after all attribute exports (ATM operations). + if (m_pipelineState->getTargetInfo().getGpuWorkarounds().gfx11.waAtmPrecedesPos) { + exportAttributes(attributeExports); + if (!attributeExports.empty()) + m_builder.CreateFence(AtomicOrdering::Release, m_builder.getContext().getOrInsertSyncScopeID("agent")); + exportPositions(positionExports); + } else { + exportPositions(positionExports); + exportAttributes(attributeExports); + } m_builder.CreateBr(endExportVertexBlock); } @@ -1218,13 +1240,16 @@ void NggPrimShader::buildPrimShader(Function *primShader) { // Barrier // } // + // if (threadIdInSubgroup < vertCountInSubgroup) + // Collect XFB exports + // // if (Enable SW XFB) // Process SW XFB // else if (Enable primitive statistics counting) // Collect primitive statistics // - // if (threadIdInWave < vertCountInWave) - // Run part ES to fetch vertex cull data + // if (threadIdInSubgroup < vertCountInSubgroup) + // Collect cull data // // if (Not runtime passthrough) { // if (threadIdInSubgroup < vertCountInSubgroup) @@ -1232,8 +1257,8 @@ void NggPrimShader::buildPrimShader(Function *primShader) { // if (threadIdInSubgroup < maxWaves + 1) // Initialize per-wave and per-subgroup count of output vertices // - // if (threadIdInWave < vertCountInWave) - // Write vertex cull data + // if (threadIdInSubgroup < vertCountInSubgroup) + // Write cull data // Barrier // // if (threadIdInSubgroup < primCountInSubgroup) { @@ -1266,14 +1291,17 @@ void NggPrimShader::buildPrimShader(Function *primShader) { // return (early exit) // } // + // if (threadIdInSubgroup < vertCountInSubgroup) + // Collect vertex exports + // // if (threadIdInSubgroup < primCountInSubgroup) // Export primitive // // if (threadIdInSubgroup < vertCountInSubgroup) { // if (Needn't compact vertex && empty wave) // Dummy vertex export - // else - // Run part ES to do deferred vertex export + // else if (drawFlag) + // Export vertex // } // } // @@ -1295,9 +1323,11 @@ void NggPrimShader::buildPrimShader(Function *primShader) { // Define basic blocks auto entryBlock = createBlock(primShader, ".entry"); - auto checkFetchVertexCullDataBlock = createBlock(primShader, ".checkFetchVertexCullData"); - auto fetchVertexCullDataBlock = createBlock(primShader, ".fetchVertexCullData"); - auto endFetchVertexCullDataBlock = createBlock(primShader, ".endFetchVertexCullData"); + auto collectXfbExportBlock = createBlock(primShader, ".collectXfbExport"); + auto endCollectXfbExportBlock = createBlock(primShader, ".endCollectXfbExport"); + + auto collectCullDataBlock = createBlock(primShader, ".collectCullData"); + auto endCollectCullDataBlock = createBlock(primShader, ".endCollectCullData"); auto checkInitVertexDrawFlagBlock = createBlock(primShader, ".checkInitVertexDrawFlag"); auto initVertexDrawFlagBlock = createBlock(primShader, ".initVertexDrawFlag"); @@ -1306,8 +1336,8 @@ void NggPrimShader::buildPrimShader(Function *primShader) { auto initVertexCountsBlock = createBlock(primShader, ".initVertexCounts"); auto endInitVertexCountsBlock = createBlock(primShader, ".endInitVertexCounts"); - auto writeVertexCullDataBlock = createBlock(primShader, ".writeVertexCullData"); - auto endWriteVertexCullDataBlock = createBlock(primShader, ".endWriteVertexCullData"); + auto writeCullDataBlock = createBlock(primShader, ".writeCullData"); + auto endWriteCullDataBlock = createBlock(primShader, ".endWriteCullData"); auto cullPrimitiveBlock = createBlock(primShader, ".cullPrimitive"); auto writeVertexDrawFlagBlock = createBlock(primShader, ".writeVertexDrawFlag"); @@ -1327,7 +1357,10 @@ void NggPrimShader::buildPrimShader(Function *primShader) { auto endSendGsAllocReqBlock = createBlock(primShader, ".endSendGsAllocReq"); auto earlyExitBlock = createBlock(primShader, ".earlyExit"); - auto checkExportPrimitiveBlock = createBlock(primShader, ".checkExportPrimitive"); + auto checkCollectVertexExportBlock = createBlock(primShader, ".checkCollectVertexExport"); + + auto collectVertexExportBlock = createBlock(primShader, ".collectVertexExport"); + auto endCollectVertexExportBlock = createBlock(primShader, ".endCollectVertexExport"); auto exportPrimitiveBlock = createBlock(primShader, ".exportPrimitive"); auto endExportPrimitiveBlock = createBlock(primShader, ".endExportPrimitive"); @@ -1371,46 +1404,109 @@ void NggPrimShader::buildPrimShader(Function *primShader) { // Distribute primitive ID if needed distributePrimitiveId(primitiveId); - // Process SW XFB or primitive statistics counting + auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); + m_builder.CreateCondBr(validVertex, collectXfbExportBlock, endCollectXfbExportBlock); + } + + // Construct ".collectXfbExport" block + SmallVector xfbExports; + { + m_builder.SetInsertPoint(collectXfbExportBlock); + if (m_pipelineState->enableSwXfb()) - processSwXfb(args); - else if (m_pipelineState->enablePrimStats()) - collectPrimitiveStats(); + collectExports(args, m_esHandlers.main, true, nullptr, nullptr, &xfbExports); - m_builder.CreateBr(checkFetchVertexCullDataBlock); + m_builder.CreateBr(endCollectXfbExportBlock); } - // Construct ".checkFetchVertexCullData" block + // Construct ".endCollectXfbExport" block { - m_builder.SetInsertPoint(checkFetchVertexCullDataBlock); + m_builder.SetInsertPoint(endCollectXfbExportBlock); - auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInWave, m_nggInputs.vertCountInWave); - m_builder.CreateCondBr(validVertex, fetchVertexCullDataBlock, endFetchVertexCullDataBlock); + if (m_pipelineState->enableSwXfb()) { + createPhiForExports(nullptr, nullptr, &xfbExports); + processSwXfb(args, xfbExports); + } else if (m_pipelineState->enablePrimStats()) { + collectPrimitiveStats(); + } + + auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); + m_builder.CreateCondBr(validVertex, collectCullDataBlock, endCollectCullDataBlock); } - // Construct ".fetchVertexCullData" block - Value *cullData = nullptr; - Value *position = nullptr; + // Construct ".collectCullData" block + SmallVector positionExports; { - m_builder.SetInsertPoint(fetchVertexCullDataBlock); + m_builder.SetInsertPoint(collectCullDataBlock); - // Split ES to two parts: fetch cull data before NGG culling; do deferred vertex export after NGG culling + // Split ES to two parts: cull data exports and other remaining exports splitEs(); - // Run part ES to fetch cull data - auto cullData = runPartEs(args); - position = m_nggControl->enableCullDistanceCulling ? m_builder.CreateExtractValue(cullData, 0) : cullData; + // Collect cull data exports + collectExports(args, m_esHandlers.part.first, false, &positionExports, nullptr, nullptr); - m_builder.CreateBr(endFetchVertexCullDataBlock); + m_builder.CreateBr(endCollectCullDataBlock); } - // Construct ".endFetchVertexCullData" block + // Construct ".endCollectCullData" block + Value *position0 = nullptr; + SmallVector cullDistance; { - m_builder.SetInsertPoint(endFetchVertexCullDataBlock); + m_builder.SetInsertPoint(endCollectCullDataBlock); + + createPhiForExports(&positionExports, nullptr, nullptr); + + unsigned clipCullExportSlot = 1; + unsigned clipDistanceCount = 0; + unsigned cullDistanceCount = 0; + if (m_nggControl->enableCullDistanceCulling) { + const auto &resUsage = + m_pipelineState->getShaderResourceUsage(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex); + + if (m_hasTes) { + const auto &builtInUsage = resUsage->builtInUsage.tes; + + const bool miscExport = builtInUsage.pointSize || builtInUsage.layer || builtInUsage.viewportIndex; + clipCullExportSlot = miscExport ? 2 : 1; + clipDistanceCount = builtInUsage.clipDistance; + cullDistanceCount = builtInUsage.cullDistance; + } else { + const auto &builtInUsage = resUsage->builtInUsage.vs; - position = createPhi( - {{position, fetchVertexCullDataBlock}, {PoisonValue::get(position->getType()), checkFetchVertexCullDataBlock}}, - "position"); // Update vertex position data + const bool miscExport = builtInUsage.pointSize || builtInUsage.layer || builtInUsage.viewportIndex || + builtInUsage.primitiveShadingRate; + clipCullExportSlot = miscExport ? 2 : 1; + clipDistanceCount = builtInUsage.clipDistance; + cullDistanceCount = builtInUsage.cullDistance; + } + + assert(cullDistanceCount > 0); // Cull distance must exist if the culling is enabled + } + + SmallVector clipCullDistance; + for (auto &positionExport : positionExports) { + if (positionExport.exportSlot == 0) { + position0 = positionExport.exportValue; + position0->setName("position0"); + } else { + if (m_nggControl->enableCullDistanceCulling) { + if (positionExport.exportSlot == clipCullExportSlot) { + for (unsigned i = 0; i < 4; i++) + clipCullDistance[i] = m_builder.CreateExtractElement(positionExport.exportValue, i); + } else if (positionExport.exportSlot == clipCullExportSlot + 1 && clipDistanceCount + cullDistanceCount > 4) { + for (unsigned i = 0; i < 4; i++) + clipCullDistance[4 + i] = m_builder.CreateExtractElement(positionExport.exportValue, i); + } + } + } + } + + if (m_nggControl->enableCullDistanceCulling) { + for (unsigned i = 0; i < cullDistanceCount; ++i) { + cullDistance.push_back(clipCullDistance[clipDistanceCount + i]); + cullDistance[i]->setName("cullDistance" + std::to_string(i)); + } + } // NOTE: If the Z channel of vertex position data is constant, we can go into runtime passthrough mode. Otherwise, // we will further check if this is a small subgroup and enable runtime passthrough mode accordingly. @@ -1460,27 +1556,22 @@ void NggPrimShader::buildPrimShader(Function *primShader) { m_builder.SetInsertPoint(endInitVertexCountsBlock); auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInWave, m_nggInputs.vertCountInWave); - m_builder.CreateCondBr(validVertex, writeVertexCullDataBlock, endWriteVertexCullDataBlock); + m_builder.CreateCondBr(validVertex, writeCullDataBlock, endWriteCullDataBlock); } - // Construct ".writeVertexCullData" block + // Construct ".writeCullData" block { - m_builder.SetInsertPoint(writeVertexCullDataBlock); + m_builder.SetInsertPoint(writeCullDataBlock); // Write vertex position data - writePerThreadDataToLds(position, m_nggInputs.threadIdInSubgroup, PrimShaderLdsRegion::VertexPosition, 0, true); + writePerThreadDataToLds(position0, m_nggInputs.threadIdInSubgroup, PrimShaderLdsRegion::VertexPosition, 0, true); // Write cull distance sign mask if (m_nggControl->enableCullDistanceCulling) { - auto cullDistance = m_builder.CreateExtractValue(cullData, 1); - // Calculate the sign mask for cull distance Value *signMask = m_builder.getInt32(0); - for (unsigned i = 0; i < cullDistance->getType()->getArrayNumElements(); ++i) { - auto cullDistanceVal = m_builder.CreateExtractValue(cullDistance, i); - cullDistanceVal = m_builder.CreateBitCast(cullDistanceVal, m_builder.getInt32Ty()); - - Value *signBit = createUBfe(cullDistanceVal, 31, 1); + for (unsigned i = 0; i < cullDistance.size(); ++i) { + Value *signBit = createUBfe(m_builder.CreateBitCast(cullDistance[i], m_builder.getInt32Ty()), 31, 1); signBit = m_builder.CreateShl(signBit, i); signMask = m_builder.CreateOr(signMask, signBit); @@ -1489,12 +1580,12 @@ void NggPrimShader::buildPrimShader(Function *primShader) { writeVertexCullInfoToLds(signMask, vertexItemOffset, m_vertCullInfoOffsets.cullDistanceSignMask); } - m_builder.CreateBr(endWriteVertexCullDataBlock); + m_builder.CreateBr(endWriteCullDataBlock); } - // Construct ".endWriteVertexCullData" block + // Construct ".endWriteCullData" block { - m_builder.SetInsertPoint(endWriteVertexCullDataBlock); + m_builder.SetInsertPoint(endWriteCullDataBlock); createFenceAndBarrier(); @@ -1532,7 +1623,7 @@ void NggPrimShader::buildPrimShader(Function *primShader) { primitiveCulled = createPhi({{m_builder.getTrue(), cullPrimitiveBlock}, {m_builder.getFalse(), writeVertexDrawFlagBlock}, - {m_builder.getTrue(), endWriteVertexCullDataBlock}}); + {m_builder.getTrue(), endWriteCullDataBlock}}); createFenceAndBarrier(); @@ -1718,40 +1809,39 @@ void NggPrimShader::buildPrimShader(Function *primShader) { // and no runtime passthrough path (normal culling path). if (m_nggControl->compactVertex) { m_compactVertex = - createPhi({{hasCulledVertices, endCompactVertexBlock}, {m_builder.getFalse(), endFetchVertexCullDataBlock}}, + createPhi({{hasCulledVertices, endCompactVertexBlock}, {m_builder.getFalse(), endCollectCullDataBlock}}, "compactVertex"); } else { assert(!m_compactVertex); // Must be null } // Update primitive culled flag - primitiveCulled = - createPhi({{primitiveCulled, endCompactVertexBlock}, {m_builder.getFalse(), endFetchVertexCullDataBlock}}, - "primitiveCulled"); + primitiveCulled = createPhi( + {{primitiveCulled, endCompactVertexBlock}, {m_builder.getFalse(), endCollectCullDataBlock}}, "primitiveCulled"); // Update fully-culled flag - fullyCulled = createPhi({{fullyCulled, endCompactVertexBlock}, {m_builder.getFalse(), endFetchVertexCullDataBlock}}, + fullyCulled = createPhi({{fullyCulled, endCompactVertexBlock}, {m_builder.getFalse(), endCollectCullDataBlock}}, "fullyCulled"); // Update primitive count in subgroup m_nggInputs.primCountInSubgroup = createPhi( - {{primCountInSubgroup, endCompactVertexBlock}, {m_nggInputs.primCountInSubgroup, endFetchVertexCullDataBlock}}, + {{primCountInSubgroup, endCompactVertexBlock}, {m_nggInputs.primCountInSubgroup, endCollectCullDataBlock}}, "primCountInSubgroup"); // Update vertex count in subgroup m_nggInputs.vertCountInSubgroup = createPhi( - {{vertCountInSubgroup, endCompactVertexBlock}, {m_nggInputs.vertCountInSubgroup, endFetchVertexCullDataBlock}}, + {{vertCountInSubgroup, endCompactVertexBlock}, {m_nggInputs.vertCountInSubgroup, endCollectCullDataBlock}}, "vertCountInSubgroup"); if (!m_nggControl->compactVertex) { // Update draw flag - drawFlag = createPhi({{drawFlag, endCompactVertexBlock}, {m_builder.getTrue(), endFetchVertexCullDataBlock}}, - "drawFlag"); + drawFlag = + createPhi({{drawFlag, endCompactVertexBlock}, {m_builder.getTrue(), endCollectCullDataBlock}}, "drawFlag"); // Update vertex count in wave - vertCountInWave = createPhi( - {{vertCountInWave, endCompactVertexBlock}, {m_nggInputs.vertCountInWave, endFetchVertexCullDataBlock}}, - "vertCountInWave"); + vertCountInWave = + createPhi({{vertCountInWave, endCompactVertexBlock}, {m_nggInputs.vertCountInWave, endCollectCullDataBlock}}, + "vertCountInWave"); } auto firstWaveInSubgroup = m_builder.CreateICmpEQ(m_nggInputs.waveIdInSubgroup, m_builder.getInt32(0)); @@ -1773,9 +1863,9 @@ void NggPrimShader::buildPrimShader(Function *primShader) { createFenceAndBarrier(); if (waNggCullingNoEmptySubgroups) - m_builder.CreateCondBr(fullyCulled, earlyExitBlock, checkExportPrimitiveBlock); + m_builder.CreateCondBr(fullyCulled, earlyExitBlock, checkCollectVertexExportBlock); else - m_builder.CreateBr(checkExportPrimitiveBlock); + m_builder.CreateBr(checkCollectVertexExportBlock); } if (waNggCullingNoEmptySubgroups) { @@ -1796,9 +1886,55 @@ void NggPrimShader::buildPrimShader(Function *primShader) { } } - // Construct ".checkExportPrimitive" block + // Construct ".checkCollectVertexExport" block { - m_builder.SetInsertPoint(checkExportPrimitiveBlock); + m_builder.SetInsertPoint(checkCollectVertexExportBlock); + + auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); + if (!m_nggControl->compactVertex) + validVertex = m_builder.CreateAnd(validVertex, drawFlag); // Culled vertex vertices will not be drawn + m_builder.CreateCondBr(validVertex, collectVertexExportBlock, endCollectVertexExportBlock); + } + + // Construct ".collectVertexExport" block + positionExports.clear(); // Will be reused, clear it + SmallVector attributeExports; + { + m_builder.SetInsertPoint(collectVertexExportBlock); + + collectExports(args, m_esHandlers.part.second, false, &positionExports, &attributeExports, nullptr); + + // NOTE: After ES splitting, position0 is not contained in the ES second part. We have to insert it back to the + // collection of position exports. + if (m_compactVertex) { + auto uncompactedVertexIndex = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, + PrimShaderLdsRegion::VertexIndexMap); + auto newPosition0 = + readPerThreadDataFromLds(FixedVectorType::get(m_builder.getFloatTy(), 4), uncompactedVertexIndex, + PrimShaderLdsRegion::VertexPosition, 0, true); + position0 = m_builder.CreateSelect(m_compactVertex, newPosition0, position0); + } + + m_builder.CreateBr(endCollectVertexExportBlock); + } + + // Construct ".endCollectVertexExport" block + { + m_builder.SetInsertPoint(endCollectVertexExportBlock); + + createPhiForExports(&positionExports, &attributeExports, nullptr); + + if (m_compactVertex) { + auto exportBlock = cast(position0)->getParent(); + auto position0Phi = + m_builder.CreatePHI(position0->getType(), pred_size(endCollectVertexExportBlock), "position0"); + for (BasicBlock *predBlock : predecessors(endCollectVertexExportBlock)) { + position0Phi->addIncoming(predBlock == exportBlock ? position0 : PoisonValue::get(position0->getType()), + predBlock); + } + position0 = position0Phi; + } + positionExports.push_back({0, 0xF, position0}); auto validPrimitive = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.primCountInSubgroup); m_builder.CreateCondBr(validPrimitive, exportPrimitiveBlock, endExportPrimitiveBlock); @@ -1869,9 +2005,9 @@ void NggPrimShader::buildPrimShader(Function *primShader) { { m_builder.SetInsertPoint(checkExportVertexBlock); - auto validVertex = m_nggControl->compactVertex - ? m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup) - : drawFlag; + auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); + if (!m_nggControl->compactVertex) + validVertex = m_builder.CreateAnd(validVertex, drawFlag); // Culled vertex vertices will not be drawn m_builder.CreateCondBr(validVertex, exportVertexBlock, endExportVertexBlock); } @@ -1879,8 +2015,17 @@ void NggPrimShader::buildPrimShader(Function *primShader) { { m_builder.SetInsertPoint(exportVertexBlock); - // Run part ES to do deferred vertex export - runPartEs(args, position); + // NOTE: If the workaround of attributes-through-memory preceding vertex position data is required, we have to + // place vertex exports after all attribute exports (ATM operations). + if (m_pipelineState->getTargetInfo().getGpuWorkarounds().gfx11.waAtmPrecedesPos) { + exportAttributes(attributeExports); + if (!attributeExports.empty()) + m_builder.CreateFence(AtomicOrdering::Release, m_builder.getContext().getOrInsertSyncScopeID("agent")); + exportPositions(positionExports); + } else { + exportPositions(positionExports); + exportAttributes(attributeExports); + } m_builder.CreateBr(endExportVertexBlock); } @@ -1955,6 +2100,9 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { // if (threadIdInWave < primCountInWave) // Run GS // + // if (threadIdInSubgroup < vertCountInSubgroup) + // Collect XFB exports + // // if (Enable SW XFB) // Process SW XFB // else if (Enable primitive statistics counting) @@ -1986,14 +2134,17 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { // Send GS_ALLOC_REQ message // Barrier // + // if (threadIdInSubgroup < vertCountInSubgroup) + // Collect vertex exports + // // if (threadIdInSubgroup < primCountInSubgroup) // Export primitive // // if (threadIdInSubgroup < vertCountInSubgroup) { // if (Needn't compact vertex && empty wave) // Dummy vertex export - // else - // Run copy shader (export vertex) + // else if (drawFlag) + // Export vertex // } // } // @@ -2010,6 +2161,9 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { auto beginGsBlock = createBlock(primShader, ".beginGs"); auto endGsBlock = createBlock(primShader, ".endGs"); + auto collectXfbExportBlock = createBlock(primShader, ".collectXfbExport"); + auto endCollectXfbExportBlock = createBlock(primShader, ".endCollectXfbExport"); + BasicBlock *initVertexCountsBlock = nullptr; BasicBlock *endInitVertexCountsBlock = nullptr; @@ -2029,6 +2183,9 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { BasicBlock *sendGsAllocReqBlock = nullptr; BasicBlock *endSendGsAllocReqBlock = nullptr; + BasicBlock *collectVertexExportBlock = nullptr; + BasicBlock *endCollectVertexExportBlock = nullptr; + BasicBlock *exportPrimitiveBlock = nullptr; BasicBlock *endExportPrimitiveBlock = nullptr; @@ -2064,6 +2221,9 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { sendGsAllocReqBlock = createBlock(primShader, ".sendGsAllocReq"); endSendGsAllocReqBlock = createBlock(primShader, ".endSendGsAllocReq"); + collectVertexExportBlock = createBlock(primShader, ".collectVertexExport"); + endCollectVertexExportBlock = createBlock(primShader, ".endCollectVertexExport"); + exportPrimitiveBlock = createBlock(primShader, ".exportPrimitive"); endExportPrimitiveBlock = createBlock(primShader, ".endExportPrimitive"); @@ -2150,11 +2310,31 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { { m_builder.SetInsertPoint(endGsBlock); - // Process SW XFB or primitive statistics counting + auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); + m_builder.CreateCondBr(validVertex, collectXfbExportBlock, endCollectXfbExportBlock); + } + + // Construct ".collectXfbExport" block + SmallVector xfbExports; + { + m_builder.SetInsertPoint(collectXfbExportBlock); + if (m_pipelineState->enableSwXfb()) - processSwXfbWithGs(args); - else if (m_pipelineState->enablePrimStats()) + collectExports(args, m_gsHandlers.copyShader, true, nullptr, nullptr, &xfbExports); + + m_builder.CreateBr(endCollectXfbExportBlock); + } + + // Construct ".endCollectXfbExport" block + { + m_builder.SetInsertPoint(endCollectXfbExportBlock); + + if (m_pipelineState->enableSwXfb()) { + createPhiForExports(nullptr, nullptr, &xfbExports); + processSwXfbWithGs(args, xfbExports); + } else if (m_pipelineState->enablePrimStats()) { collectPrimitiveStats(); + } if (noRasterization) { auto firstWaveInSubgroup = m_builder.CreateICmpEQ(m_nggInputs.waveIdInSubgroup, m_builder.getInt32(0)); @@ -2460,6 +2640,30 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { { m_builder.SetInsertPoint(endSendGsAllocReqBlock); + auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); + if (!m_nggControl->compactVertex) + validVertex = m_builder.CreateAnd(validVertex, drawFlag); // Culled vertex vertices will not be drawn + m_builder.CreateCondBr(validVertex, collectVertexExportBlock, endCollectVertexExportBlock); + } + + // Construct ".collectVertexExport" block + SmallVector positionExports; + SmallVector attributeExports; + { + m_builder.SetInsertPoint(collectVertexExportBlock); + + mutateCopyShader(); + collectExports(args, m_gsHandlers.copyShader, false, &positionExports, &attributeExports, nullptr); + + m_builder.CreateBr(endCollectVertexExportBlock); + } + + // Construct ".endCollectVertexExport" block + { + m_builder.SetInsertPoint(endCollectVertexExportBlock); + + createPhiForExports(&positionExports, &attributeExports, nullptr); + auto validPrimitive = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.primCountInSubgroup); m_builder.CreateCondBr(validPrimitive, exportPrimitiveBlock, endExportPrimitiveBlock); } @@ -2529,9 +2733,9 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { { m_builder.SetInsertPoint(checkExportVertexBlock); - auto validVertex = m_nggControl->compactVertex - ? m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup) - : drawFlag; + auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); + if (!m_nggControl->compactVertex) + validVertex = m_builder.CreateAnd(validVertex, drawFlag); // Culled vertex vertices will not be drawn m_builder.CreateCondBr(validVertex, exportVertexBlock, endExportVertexBlock); } @@ -2539,7 +2743,17 @@ void NggPrimShader::buildPrimShaderWithGs(Function *primShader) { { m_builder.SetInsertPoint(exportVertexBlock); - runCopyShader(args); + // NOTE: If the workaround of attributes-through-memory preceding vertex position data is required, we have to + // place vertex exports after all attribute exports (ATM operations). + if (m_pipelineState->getTargetInfo().getGpuWorkarounds().gfx11.waAtmPrecedesPos) { + exportAttributes(attributeExports); + if (!attributeExports.empty()) + m_builder.CreateFence(AtomicOrdering::Release, m_builder.getContext().getOrInsertSyncScopeID("agent")); + exportPositions(positionExports); + } else { + exportPositions(positionExports); + exportAttributes(attributeExports); + } m_builder.CreateBr(endExportVertexBlock); } @@ -2719,7 +2933,7 @@ void NggPrimShader::loadStreamOutBufferInfo(Value *userData) { for (unsigned i = 0; i < MaxTransformFeedbackBuffers; ++i) { bool bufferActive = xfbStrides[i] > 0; if (!bufferActive) - continue; // Transform feedback buffer inactive + continue; // XFB buffer inactive // Get stream-out buffer descriptors and record them m_streamOutBufDescs[i] = readValueFromCb(FixedVectorType::get(m_builder.getInt32Ty(), 4), streamOutTablePtr, @@ -3220,24 +3434,15 @@ void NggPrimShader::earlyExitWithDummyExport() { // // @param args : Arguments of primitive shader entry-point void NggPrimShader::runEs(ArrayRef args) { - if (!m_hasTes && !m_hasVs) { - // No TES or VS, don't have to run - return; - } + if (!m_esHandlers.main) + return; // No ES, don't run - if (!m_hasGs) { - // For GS, vertex export is done in copy shader - IRBuilder<>::InsertPointGuard guard(m_builder); - mutateToExportVertex(m_esHandlers.main); - } + assert(m_hasGs); // GS must be present, ES is run as part of ES-GS merged shader - Value *esGsOffset = nullptr; - if (m_hasGs) { - auto &hwConfig = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.hwConfig; - unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); - esGsOffset = - m_builder.CreateMul(m_nggInputs.waveIdInSubgroup, m_builder.getInt32(waveSize * hwConfig.esGsRingItemSize)); - } + auto &hwConfig = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.hwConfig; + unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Geometry); + auto esGsOffset = + m_builder.CreateMul(m_nggInputs.waveIdInSubgroup, m_builder.getInt32(waveSize * hwConfig.esGsRingItemSize)); Value *offChipLdsBase = args[ShaderMerger::getSpecialSgprInputIndex(m_gfxIp, EsGs::OffChipLdsBase)]; offChipLdsBase->setName("offChipLdsBase"); @@ -3273,11 +3478,6 @@ void NggPrimShader::runEs(ArrayRef args) { SmallVector esArgs; - if (!m_pipelineState->exportAttributeByExportInstruction()) { - if (!m_hasGs) // For GS, ATM is in copy shader - appendAttributeThroughMemoryArguments(esArgs); - } - // Set up user data SGPRs const unsigned userDataCount = m_pipelineState->getShaderInterfaceData(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex)->userDataCount; @@ -3286,9 +3486,7 @@ void NggPrimShader::runEs(ArrayRef args) { if (m_hasTes) { // Set up system value SGPRs esArgs.push_back(offChipLdsBase); - - if (m_hasGs) - esArgs.push_back(esGsOffset); + esArgs.push_back(esGsOffset); // Set up system value VGPRs esArgs.push_back(tessCoordX); @@ -3297,8 +3495,7 @@ void NggPrimShader::runEs(ArrayRef args) { esArgs.push_back(patchId); } else { // Set up system value SGPRs - if (m_hasGs) - esArgs.push_back(esGsOffset); + esArgs.push_back(esGsOffset); // Set up system value VGPRs esArgs.push_back(vertexId); @@ -3314,203 +3511,40 @@ void NggPrimShader::runEs(ArrayRef args) { } // ===================================================================================================================== -// Runs part ES. Before doing this, ES must have been already split to two parts: one is to fetch cull data for -// NGG culling; the other is to do deferred vertex export. +// Split ES to two parts. The first part only contains cull data exports, such as position and cull distance (if cull +// distance culling is enabled). The second part contains other remaining exports that are not in the first part. // -// @param args : Arguments of primitive shader entry-point -// @param position : Vertex position data (if provided, the part ES is to do deferred vertex export) -Value *NggPrimShader::runPartEs(ArrayRef args, Value *position) { - assert(m_hasGs == false); // GS must not be present - assert(m_nggControl->passthroughMode == false); // NGG culling is enabled - - const bool deferredVertexExport = position != nullptr; - - Value *offChipLdsBase = args[ShaderMerger::getSpecialSgprInputIndex(m_gfxIp, EsGs::OffChipLdsBase)]; - offChipLdsBase->setName("offChipLdsBase"); - - Value *userData = args[NumSpecialSgprInputs]; - - ArrayRef vgprArgs(args.begin() + NumSpecialSgprInputs + 1, args.end()); - - Value *tessCoordX = nullptr; - Value *tessCoordY = nullptr; - Value *relPatchId = nullptr; - Value *patchId = nullptr; - - Value *vertexId = nullptr; - Value *relVertexId = PoisonValue::get(m_builder.getInt32Ty()); // Unused - // NOTE: VS primitive ID for NGG is specially obtained from primitive ID distribution. - Value *vsPrimitiveId = m_distributedPrimitiveId ? m_distributedPrimitiveId : PoisonValue::get(m_builder.getInt32Ty()); - Value *instanceId = nullptr; - - if (m_gfxIp.major <= 11) { - if (m_hasTes) { - tessCoordX = vgprArgs[5]; - tessCoordY = vgprArgs[6]; - relPatchId = vgprArgs[7]; - patchId = vgprArgs[8]; - } else { - vertexId = vgprArgs[5]; - instanceId = vgprArgs[8]; - } - } else { - llvm_unreachable("Not implemented!"); - } - - if (deferredVertexExport && m_compactVertex) { - auto exportVertexBlock = m_builder.GetInsertBlock(); - - auto uncompactVertexBlock = createBlock(exportVertexBlock->getParent(), ".uncompactVertex"); - uncompactVertexBlock->moveAfter(exportVertexBlock); - - auto endUncompactVertexBlock = createBlock(exportVertexBlock->getParent(), ".endUncompactVertex"); - endUncompactVertexBlock->moveAfter(uncompactVertexBlock); - - m_builder.CreateCondBr(m_compactVertex, uncompactVertexBlock, endUncompactVertexBlock); - - // Construct ".uncompactVertex" block - Value *newPosition = nullptr; - Value *newTessCoordX = nullptr; - Value *newTessCoordY = nullptr; - Value *newRelPatchId = nullptr; - Value *newPatchId = nullptr; - Value *newVertexId = nullptr; - Value *newVsPrimitiveId = nullptr; - Value *newInstanceId = nullptr; - { - m_builder.SetInsertPoint(uncompactVertexBlock); - - const unsigned esGsRingItemSize = - m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.hwConfig.esGsRingItemSize; - - auto uncompactedVertexIndex = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::VertexIndexMap); - auto vertexItemOffset = m_builder.CreateMul(uncompactedVertexIndex, m_builder.getInt32(esGsRingItemSize)); - - newPosition = readPerThreadDataFromLds(FixedVectorType::get(m_builder.getFloatTy(), 4), uncompactedVertexIndex, - PrimShaderLdsRegion::VertexPosition, 0, true); - - // NOTE: For deferred vertex export, some system values could be from vertex compaction info rather than from - // VGPRs (caused by NGG culling and vertex compaction) - const auto resUsage = - m_pipelineState->getShaderResourceUsage(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex); - if (m_hasTes) { - if (resUsage->builtInUsage.tes.tessCoord) { - newTessCoordX = - readVertexCullInfoFromLds(m_builder.getFloatTy(), vertexItemOffset, m_vertCullInfoOffsets.tessCoordX); - newTessCoordY = - readVertexCullInfoFromLds(m_builder.getFloatTy(), vertexItemOffset, m_vertCullInfoOffsets.tessCoordY); - } - - newRelPatchId = - readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.relPatchId); - - if (resUsage->builtInUsage.tes.primitiveId) { - newPatchId = - readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.patchId); - } - } else { - if (resUsage->builtInUsage.vs.vertexIndex) { - newVertexId = - readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.vertexId); - } - - // NOTE: Relative vertex index provided by HW is not used when VS is merged to GS. - - if (resUsage->builtInUsage.vs.primitiveId) { - newVsPrimitiveId = - readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.primitiveId); - } - - if (resUsage->builtInUsage.vs.instanceIndex) { - newInstanceId = - readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.instanceId); - } - } - m_builder.CreateBr(endUncompactVertexBlock); - } - - // Construct ".endUncompactVertex" block - { - m_builder.SetInsertPoint(endUncompactVertexBlock); - - position = createPhi({{newPosition, uncompactVertexBlock}, {position, exportVertexBlock}}); - - if (m_hasTes) { - if (newTessCoordX) - tessCoordX = createPhi({{newTessCoordX, uncompactVertexBlock}, {tessCoordX, exportVertexBlock}}); - - if (newTessCoordY) - tessCoordY = createPhi({{newTessCoordY, uncompactVertexBlock}, {tessCoordY, exportVertexBlock}}); - - assert(newRelPatchId); - relPatchId = createPhi({{newRelPatchId, uncompactVertexBlock}, {relPatchId, exportVertexBlock}}); - - if (newPatchId) - patchId = createPhi({{newPatchId, uncompactVertexBlock}, {patchId, exportVertexBlock}}); - } else { - if (newVertexId) - vertexId = createPhi({{newVertexId, uncompactVertexBlock}, {vertexId, exportVertexBlock}}); - - if (newVsPrimitiveId) - vsPrimitiveId = createPhi({{newVsPrimitiveId, uncompactVertexBlock}, {vsPrimitiveId, exportVertexBlock}}); - - if (newInstanceId) - instanceId = createPhi({{newInstanceId, uncompactVertexBlock}, {instanceId, exportVertexBlock}}); - } - } - } - - auto partEs = deferredVertexExport ? m_esHandlers.vertexExporter : m_esHandlers.cullDataFetcher; - - SmallVector partEsArgs; +// NOTE: After this splitting, original ES is removed and couldn't be used any more. +void NggPrimShader::splitEs() { + assert(m_hasGs == false); // GS must not be present - if (!m_pipelineState->exportAttributeByExportInstruction() && deferredVertexExport) - appendAttributeThroughMemoryArguments(partEsArgs); + SmallVector callsToRemove; - if (deferredVertexExport) - partEsArgs.push_back(position); // Setup vertex position data as the additional argument + // + // Create ES first part (only contains cull data exports) + // - // Set up user data SGPRs - const unsigned userDataCount = - m_pipelineState->getShaderInterfaceData(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex)->userDataCount; - appendUserData(partEsArgs, partEs, userData, userDataCount); + // Clone ES + auto esFirstPartTy = FunctionType::get(m_builder.getVoidTy(), m_esHandlers.main->getFunctionType()->params(), false); + auto esFirstPart = + Function::Create(esFirstPartTy, GlobalVariable::InternalLinkage, "", m_esHandlers.main->getParent()); - if (m_hasTes) { - // Set up system value SGPRs - partEsArgs.push_back(offChipLdsBase); + ValueToValueMapTy valueMap; - // Set up system value VGPRs - partEsArgs.push_back(tessCoordX); - partEsArgs.push_back(tessCoordY); - partEsArgs.push_back(relPatchId); - partEsArgs.push_back(patchId); - } else { - // Set up system value VGPRs - partEsArgs.push_back(vertexId); - partEsArgs.push_back(relVertexId); - partEsArgs.push_back(vsPrimitiveId); - partEsArgs.push_back(instanceId); - } + Argument *newArg = esFirstPart->arg_begin(); + for (Argument &arg : m_esHandlers.main->args()) + valueMap[&arg] = newArg++; - assert(partEsArgs.size() == partEs->arg_size()); // Must have visit all arguments of the part ES + SmallVector retInsts; + CloneFunctionInto(esFirstPart, m_esHandlers.main, valueMap, CloneFunctionChangeType::LocalChangesOnly, retInsts); - CallInst *partEsCall = callFunctionHelper(partEs, partEsArgs, m_builder.GetInsertBlock()); - partEsCall->setCallingConv(CallingConv::AMDGPU_ES); - return partEsCall; -} + esFirstPart->setDLLStorageClass(GlobalValue::DefaultStorageClass); + esFirstPart->setCallingConv(CallingConv::C); + esFirstPart->addFnAttr(Attribute::AlwaysInline); -// ===================================================================================================================== -// Split ES to two parts. One is to fetch cull data for NGG culling, such as position and cull distance (if cull -// distance culling is enabled). The other is to do deferred vertex export like original ES. -// -// NOTE: After this splitting, original ES is removed and couldn't be used any more. -void NggPrimShader::splitEs() { - assert(m_hasGs == false); // GS must not be present + esFirstPart->setName(NggEsFirstPart); - // - // Preparation for fetching cull distances - // + // Mutate ES first part by only keeping cull data exports unsigned clipCullExportSlot = 1; unsigned clipDistanceCount = 0; unsigned cullDistanceCount = 0; @@ -3522,15 +3556,15 @@ void NggPrimShader::splitEs() { if (m_hasTes) { const auto &builtInUsage = resUsage->builtInUsage.tes; - bool miscExport = builtInUsage.pointSize || builtInUsage.layer || builtInUsage.viewportIndex; + const bool miscExport = builtInUsage.pointSize || builtInUsage.layer || builtInUsage.viewportIndex; clipCullExportSlot = miscExport ? 2 : 1; clipDistanceCount = builtInUsage.clipDistance; cullDistanceCount = builtInUsage.cullDistance; } else { const auto &builtInUsage = resUsage->builtInUsage.vs; - bool miscExport = builtInUsage.pointSize || builtInUsage.layer || builtInUsage.viewportIndex; - miscExport |= builtInUsage.primitiveShadingRate; + const bool miscExport = builtInUsage.pointSize || builtInUsage.layer || builtInUsage.viewportIndex || + builtInUsage.primitiveShadingRate; clipCullExportSlot = miscExport ? 2 : 1; clipDistanceCount = builtInUsage.clipDistance; cullDistanceCount = builtInUsage.cullDistance; @@ -3539,174 +3573,83 @@ void NggPrimShader::splitEs() { assert(cullDistanceCount > 0); // Cull distance must exist if the culling is enabled } - // - // Create the part ES to fetch cull data for NGG culling - // - const auto positionTy = FixedVectorType::get(m_builder.getFloatTy(), 4); - const auto cullDistanceTy = ArrayType::get(m_builder.getFloatTy(), cullDistanceCount); - - Type *cullDataTy = positionTy; - if (m_nggControl->enableCullDistanceCulling) - cullDataTy = StructType::get(m_builder.getContext(), {positionTy, cullDistanceTy}); - - // Clone ES - auto esCullDataFetcherTy = FunctionType::get(cullDataTy, m_esHandlers.main->getFunctionType()->params(), false); - auto esCullDataFetcher = - Function::Create(esCullDataFetcherTy, m_esHandlers.main->getLinkage(), "", m_esHandlers.main->getParent()); - - ValueToValueMapTy valueMap; - - Argument *newArg = esCullDataFetcher->arg_begin(); - for (Argument &arg : m_esHandlers.main->args()) - valueMap[&arg] = newArg++; - - SmallVector retInsts; - CloneFunctionInto(esCullDataFetcher, m_esHandlers.main, valueMap, CloneFunctionChangeType::LocalChangesOnly, - retInsts); - esCullDataFetcher->setName(NggEsCullDataFetcher); - - // Find the return block, remove all exports, and mutate return type - BasicBlock *retBlock = nullptr; - for (BasicBlock &block : *esCullDataFetcher) { - auto retInst = dyn_cast(block.getTerminator()); - if (retInst) { - retInst->dropAllReferences(); - retInst->eraseFromParent(); - - retBlock = █ - break; - } - } - assert(retBlock); - - IRBuilder<>::InsertPointGuard guard(m_builder); - m_builder.SetInsertPoint(retBlock); - - SmallVector callsToRemove; - - // Fetch position and cull distances - Value *position = PoisonValue::get(positionTy); - SmallVector clipCullDistance(MaxClipCullDistanceCount); - { struct Payload { NggPrimShader &self; const unsigned clipCullExportSlot; const unsigned clipDistanceCount; const unsigned cullDistanceCount; - Value *&position; - SmallVectorImpl &clipCullDistance; SmallVectorImpl &callsToRemove; }; - Payload payload = {*this, clipCullExportSlot, clipDistanceCount, cullDistanceCount, - position, clipCullDistance, callsToRemove}; + Payload payload = {*this, clipCullExportSlot, clipDistanceCount, cullDistanceCount, callsToRemove}; static const auto visitor = llvm_dialects::VisitorBuilder() .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) .add([](Payload &payload, NggExportPositionOp &exportPositionOp) { - auto &builder = payload.self.m_builder; - + bool keepExport = false; auto exportSlot = exportPositionOp.getExportSlot(); if (exportSlot == 0) { - // Get position value - payload.self.m_constPositionZ = isa(exportPositionOp.getExportValue2()); - payload.position = builder.CreateInsertElement(payload.position, exportPositionOp.getExportValue0(), - static_cast(0)); - payload.position = builder.CreateInsertElement(payload.position, exportPositionOp.getExportValue1(), 1); - payload.position = builder.CreateInsertElement(payload.position, exportPositionOp.getExportValue2(), 2); - payload.position = builder.CreateInsertElement(payload.position, exportPositionOp.getExportValue3(), 3); - } else if (exportSlot == payload.clipCullExportSlot) { - // Get clip/cull distance value + keepExport = true; // Position0 + } else { if (payload.self.m_nggControl->enableCullDistanceCulling) { - payload.clipCullDistance[0] = exportPositionOp.getExportValue0(); - payload.clipCullDistance[1] = exportPositionOp.getExportValue1(); - payload.clipCullDistance[2] = exportPositionOp.getExportValue2(); - payload.clipCullDistance[3] = exportPositionOp.getExportValue3(); - } - } else if (exportSlot == payload.clipCullExportSlot + 1 && - payload.clipDistanceCount + payload.cullDistanceCount > 4) { - // Get clip/cull distance value - if (payload.self.m_nggControl->enableCullDistanceCulling) { - payload.clipCullDistance[4] = exportPositionOp.getExportValue0(); - payload.clipCullDistance[5] = exportPositionOp.getExportValue1(); - payload.clipCullDistance[6] = exportPositionOp.getExportValue2(); - payload.clipCullDistance[7] = exportPositionOp.getExportValue3(); + if (exportSlot == payload.clipCullExportSlot || + (exportSlot == payload.clipCullExportSlot + 1 && + payload.clipDistanceCount + payload.cullDistanceCount > 4)) { + keepExport = true; // CullDistance + } } } - payload.callsToRemove.push_back(&exportPositionOp); + if (!keepExport) + payload.callsToRemove.push_back(&exportPositionOp); }) .add([](Payload &payload, NggExportAttributeOp &exportAttributeOp) { payload.callsToRemove.push_back(&exportAttributeOp); }) + .add([](Payload &payload, WriteXfbOutputOp &writeXfbOutputOp) { + payload.callsToRemove.push_back(&writeXfbOutputOp); + }) .build(); - visitor.visit(payload, *esCullDataFetcher); + visitor.visit(payload, *esFirstPart); } - Value *cullData = position; - if (m_nggControl->enableCullDistanceCulling) { - Value *cullDistance = PoisonValue::get(cullDistanceTy); - - for (unsigned i = 0; i < cullDistanceCount; ++i) - cullDistance = m_builder.CreateInsertValue(cullDistance, clipCullDistance[clipDistanceCount + i], i); - - cullData = m_builder.CreateInsertValue(PoisonValue::get(cullDataTy), position, 0); - cullData = m_builder.CreateInsertValue(cullData, cullDistance, 1); - } - - m_builder.CreateRet(cullData); - // - // Create the part ES to do deferred vertex export after NGG culling + // Create ES second part (contains other remaining exports) // + auto esSecondPart = m_esHandlers.main; - // NOTE: Here, we just mutate original ES to do deferred vertex export. We add vertex position data as an additional - // argument. This could avoid re-fetching it since we already get the data before NGG culling. - auto esVertexExporter = - addFunctionArgs(m_esHandlers.main, nullptr, {positionTy}, {"position"}, 0, AddFunctionArgsMaybeUndef); - esVertexExporter->setName(NggEsVertexExporter); + esSecondPart->setDLLStorageClass(GlobalValue::DefaultStorageClass); + esSecondPart->setLinkage(GlobalValue::InternalLinkage); + esSecondPart->setCallingConv(CallingConv::C); + esSecondPart->addFnAttr(Attribute::AlwaysInline); - position = esVertexExporter->getArg(0); // The first argument is vertex position data - assert(position->getType() == positionTy); + esSecondPart->setName(NggEsSecondPart); + // Mutate ES second part by keep other remaining exports { struct Payload { - NggPrimShader &self; - Value *position; + SmallVectorImpl &callsToRemove; }; - Payload payload = {*this, position}; + Payload payload = {callsToRemove}; static const auto visitor = llvm_dialects::VisitorBuilder() .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) .add([](Payload &payload, NggExportPositionOp &exportPositionOp) { - auto &builder = payload.self.m_builder; - builder.SetInsertPoint(&exportPositionOp); - - if (exportPositionOp.getExportSlot() == 0) { - // Replace vertex position data - exportPositionOp.setExportValue0( - builder.CreateExtractElement(payload.position, static_cast(0))); - exportPositionOp.setExportValue1(builder.CreateExtractElement(payload.position, 1)); - exportPositionOp.setExportValue2(builder.CreateExtractElement(payload.position, 2)); - exportPositionOp.setExportValue3(builder.CreateExtractElement(payload.position, 3)); - } + if (exportPositionOp.getExportSlot() == 0) + payload.callsToRemove.push_back(&exportPositionOp); // Position0 + }) + .add([](Payload &payload, WriteXfbOutputOp &writeXfbOutputOp) { + payload.callsToRemove.push_back(&writeXfbOutputOp); }) .build(); - visitor.visit(payload, *esVertexExporter); + visitor.visit(payload, *esSecondPart); } - mutateToExportVertex(esVertexExporter); - - // Remove original ES since it is no longer needed - assert(m_esHandlers.main->use_empty()); - m_esHandlers.main->eraseFromParent(); + // Remove original ES main function m_esHandlers.main = nullptr; - - // Record new part ES - m_esHandlers.cullDataFetcher = esCullDataFetcher; - m_esHandlers.vertexExporter = esVertexExporter; + m_esHandlers.part = std::make_pair(esFirstPart, esSecondPart); for (auto call : callsToRemove) { call->dropAllReferences(); @@ -3719,6 +3662,7 @@ void NggPrimShader::splitEs() { // // @param args : Arguments of primitive shader entry-point void NggPrimShader::runGs(ArrayRef args) { + assert(m_gsHandlers.main); assert(m_hasGs); // GS must be present mutateGs(); @@ -3901,87 +3845,25 @@ void NggPrimShader::mutateGs() { } // ===================================================================================================================== -// Runs copy shader. -// -// @param args : Arguments of primitive shader entry-point -void NggPrimShader::runCopyShader(ArrayRef args) { +// Mutates copy shader to handle the reading GS outputs from GS-VS ring and remove already-handled XFB exports. +void NggPrimShader::mutateCopyShader() { assert(m_hasGs); // GS must be present - // - // The processing is something like this: - // - // vertexIndices = Relative vertex indices - // if (compactVertex) - // vertexIndices = Read uncompacted relative vertex indices from LDS - // Calculate vertex offset and run copy shader - // - Value *vertexIndex = m_nggInputs.threadIdInSubgroup; - if (m_compactVertex) { - auto exportVertexBlock = m_builder.GetInsertBlock(); - - auto uncompactVertexIndexBlock = createBlock(exportVertexBlock->getParent(), ".uncompactVertexIndex"); - uncompactVertexIndexBlock->moveAfter(exportVertexBlock); - - auto endUncompactVertexIndexBlock = createBlock(exportVertexBlock->getParent(), ".endUncompactVertexIndex"); - endUncompactVertexIndexBlock->moveAfter(uncompactVertexIndexBlock); - - m_builder.CreateCondBr(m_compactVertex, uncompactVertexIndexBlock, endUncompactVertexIndexBlock); - - // Construct ".uncompactVertexIndex" block - Value *uncompactedVertexIndex = nullptr; - { - m_builder.SetInsertPoint(uncompactVertexIndexBlock); - - uncompactedVertexIndex = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, - PrimShaderLdsRegion::VertexIndexMap); - - m_builder.CreateBr(endUncompactVertexIndexBlock); - } - - // Construct ".endUncompactVertexIndex" block - { - m_builder.SetInsertPoint(endUncompactVertexIndexBlock); - - vertexIndex = createPhi({{uncompactedVertexIndex, uncompactVertexIndexBlock}, {vertexIndex, exportVertexBlock}}); - } - } - - mutateCopyShader(); - - // Run copy shader - SmallVector copyShaderArgs; - - if (m_gfxIp.major >= 11) { - if (!m_pipelineState->exportAttributeByExportInstruction()) - appendAttributeThroughMemoryArguments(copyShaderArgs); - } - - // Relative vertex index in subgroup (to access GS-VS ring, without vertex compaction) - copyShaderArgs.push_back(vertexIndex); - - CallInst *copyShaderCall = m_builder.CreateCall(m_gsHandlers.copyShader, copyShaderArgs); - copyShaderCall->setCallingConv(CallingConv::AMDGPU_VS); -} - -// ===================================================================================================================== -// Mutates copy shader to handle the reading GS outputs from GS-VS ring. -void NggPrimShader::mutateCopyShader() { IRBuilder<>::InsertPointGuard guard(m_builder); - mutateToExportVertex(m_gsHandlers.copyShader); + assert(m_gsHandlers.copyShader->arg_size() == 1); // Only one argument + auto vertexIndex = getFunctionArgument(m_gsHandlers.copyShader, 0); - // Relative vertex index is always the last argument - auto vertexIndex = getFunctionArgument(m_gsHandlers.copyShader, m_gsHandlers.copyShader->arg_size() - 1); const unsigned rasterStream = m_pipelineState->getRasterizerState().rasterStream; assert(rasterStream != InvalidValue); - SmallVector callsToRemove; + SmallVector callsToRemove; struct Payload { NggPrimShader &self; Value *vertexIndex; const unsigned rasterStream; - SmallVectorImpl &callsToRemove; + SmallVectorImpl &callsToRemove; }; Payload payload = {*this, vertexIndex, rasterStream, callsToRemove}; @@ -4005,10 +3887,14 @@ void NggPrimShader::mutateCopyShader() { payload.callsToRemove.push_back(&readGsOutputOp); }) + .add([](Payload &payload, WriteXfbOutputOp &writeXfbOutputOp) { + payload.callsToRemove.push_back(&writeXfbOutputOp); + }) .build(); visitor.visit(payload, *m_gsHandlers.copyShader); for (auto call : callsToRemove) { + assert(call->user_empty()); call->dropAllReferences(); call->eraseFromParent(); } @@ -6197,141 +6083,302 @@ Value *NggPrimShader::ballot(Value *value) { } // ===================================================================================================================== -// Append additional arguments to the argument list for attribute-through-memory (ATM) of the specified shader stage. -// Currently, three arguments are required to do attribute-through-memory: -// (1) Attribute ring buffer descriptor; -// (2) Attribute ring base offset; -// (3) Relative vertex index in NGG subgroup. +// Make an export collector from the specified function by returning the exports of position/attribute/XFB. The input +// collections are to decide which kinds of exports will be collected. // -// @param [in/out] args : The arguments that will be appended to -void NggPrimShader::appendAttributeThroughMemoryArguments(SmallVectorImpl &args) { - assert(m_gfxIp.major >= 11); // For GFX11+ - assert(!m_pipelineState->exportAttributeByExportInstruction()); // ATM is allowed +// @param [in/out] fromFunc : Function from which to make an export collector +// @param makeClone : Whether to make a clone of the specified function +// @param [out] positionExports : Collection of position exports +// @param [out] attributeExports : Collection of attribute exports +// @param [out] xfbExports : Collection of XFB exports +// @returns : Export collector after mutation +Function *NggPrimShader::makeExportCollector(Function *&fromFunc, bool makeClone, + SmallVectorImpl *positionExports, + SmallVectorImpl *attributeExports, + SmallVectorImpl *xfbExports) { + const bool collectPositionExports = positionExports != nullptr; + const bool collectAttributeExports = attributeExports != nullptr; + const bool collectXfbExports = m_pipelineState->enableSwXfb() && xfbExports != nullptr; + assert(collectPositionExports || collectAttributeExports || collectXfbExports); - if (!m_attribRingBufDesc && !m_attribRingBaseOffset) - return; // No ATM, no attributes to export + // + // Count vertex position/attribute/XFB exports. + // + unsigned numPositionExports = 0; + unsigned numAttributeExports = 0; + unsigned numXfbExports = 0; - args.push_back(m_attribRingBufDesc); - args.push_back(m_attribRingBaseOffset); - args.push_back(m_nggInputs.threadIdInSubgroup); -} + { + struct Payload { + const bool collectPositionExports; + const bool collectAttributeExports; + const bool collectXfbExports; + unsigned &numPositionExports; + unsigned &numAttributeExports; + unsigned &numXfbExports; + }; + Payload payload = {collectPositionExports, collectAttributeExports, collectXfbExports, + numPositionExports, numAttributeExports, numXfbExports}; -// ===================================================================================================================== -// Mutate the target function to export vertex (positions and attributes) by lowering position/attribute exporting. If -// attribute through memory (ATM) is required, we mutate its argument list by adding three additional arguments -// (attribute ring buffer descriptor, attribute ring base offset, and relative vertex index in subgroup). -// -// @param [in/out] target : Target function to process vertex export -void NggPrimShader::mutateToExportVertex(Function *&target) { - Value *attribRingBufDesc = nullptr; - Value *attribRingBaseOffset = nullptr; - Value *vertexIndex = nullptr; + static const auto visitor = + llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add([](Payload &payload, NggExportPositionOp &exportPositionOp) { + if (payload.collectPositionExports) + ++payload.numPositionExports; + }) + .add([](Payload &payload, NggExportAttributeOp &exportAttributeOp) { + if (payload.collectAttributeExports) + ++payload.numAttributeExports; + }) + .add([](Payload &payload, WriteXfbOutputOp &writeXfbOutputOp) { + if (payload.collectXfbExports) + ++payload.numXfbExports; + }) + .build(); + visitor.visit(payload, *fromFunc); + } - // - // Mutate the argument list of the target function for ATM. - // - if (!m_pipelineState->exportAttributeByExportInstruction()) { - assert(m_gfxIp.major >= 11); // Must be GFX11+ + if (numPositionExports == 0 && numAttributeExports == 0 && numXfbExports == 0) + return nullptr; // No exports to collect - // Could be no ATM - if (m_attribRingBufDesc && m_attribRingBaseOffset) { - // Mutate the argument list by adding two additional arguments - auto newTarget = addFunctionArgs( - target, nullptr, - { - FixedVectorType::get(m_builder.getInt32Ty(), 4), // Attribute ring buffer descriptor (4 SGPRs) - m_builder.getInt32Ty(), // Attribute ring base offset (SGPR) - m_builder.getInt32Ty() // Relative vertex index in subgroup (VGPR) - }, - {"attribRingBufDesc", "attribRingBaseOffset", "vertexIndex"}, 0x3); + auto exportTy = FixedVectorType::get(m_builder.getFloatTy(), 4); - // Original function is no longer needed - assert(target->use_empty()); - target->eraseFromParent(); + ArrayType *positionExportsTy = nullptr; + if (numPositionExports > 0) { + positionExportsTy = ArrayType::get(exportTy, numPositionExports); + positionExports->resize(numPositionExports); + } - target = newTarget; + ArrayType *attributeExportsTy = nullptr; + if (numAttributeExports > 0) { + attributeExportsTy = ArrayType::get(exportTy, numAttributeExports); + attributeExports->resize(numAttributeExports); + } - attribRingBufDesc = target->getArg(0); - attribRingBaseOffset = target->getArg(1); - vertexIndex = target->getArg(2); - } + ArrayType *xfbExportsTy = nullptr; + if (numXfbExports > 0) { + xfbExportsTy = ArrayType::get(exportTy, numXfbExports); + xfbExports->resize(numXfbExports); } + SmallVector exportsTy; + if (positionExportsTy) + exportsTy.push_back(positionExportsTy); + if (attributeExportsTy) + exportsTy.push_back(attributeExportsTy); + if (xfbExportsTy) + exportsTy.push_back(xfbExportsTy); + auto returnTy = StructType::get(m_builder.getContext(), exportsTy); + + Function *exportCollector = nullptr; + if (makeClone) { + auto exportCollectorTy = FunctionType::get(returnTy, fromFunc->getFunctionType()->params(), false); + exportCollector = Function::Create(exportCollectorTy, GlobalVariable::InternalLinkage, "", fromFunc->getParent()); + + ValueToValueMapTy valueMap; + + Argument *newArg = exportCollector->arg_begin(); + for (Argument &arg : fromFunc->args()) + valueMap[&arg] = newArg++; + + SmallVector retInsts; + CloneFunctionInto(exportCollector, fromFunc, valueMap, CloneFunctionChangeType::LocalChangesOnly, retInsts); + } else { + exportCollector = addFunctionArgs(fromFunc, returnTy, {}, {}); + + // Original function is no longer needed + assert(fromFunc->use_empty()); + fromFunc->eraseFromParent(); + fromFunc = nullptr; + } + + exportCollector->setDLLStorageClass(GlobalValue::DefaultStorageClass); + exportCollector->setLinkage(GlobalValue::InternalLinkage); + exportCollector->setCallingConv(CallingConv::C); + exportCollector->addFnAttr(Attribute::AlwaysInline); + + std::string postfix = ""; + if (numPositionExports > 0 && numAttributeExports > 0 && numXfbExports > 0) + postfix = ""; // Collect all + else if (numPositionExports > 0 && numAttributeExports > 0) + postfix = ".vertex"; // Collect vertex (position and attribute) + else if (numPositionExports > 0) + postfix = ".position"; // Collect position + else if (numAttributeExports > 0) + postfix = ".attribute"; // Collect attribute + else if (numXfbExports > 0) + postfix = ".xfb"; // Collect XFB + else + llvm_unreachable("Unexpected collecting kind"); + + exportCollector->setName(NggExportCollector + postfix); + // - // Collect vertex position/attribute exports. + // Collect vertex position/attribute/XFB exports. // SmallVector exportPositionOps; SmallVector exportAttributeOps; + SmallVector writeXfbOutputOps; - // Collect vertex poistion/attribute exports - struct Payload { - SmallVectorImpl &exportPositionOps; - SmallVectorImpl &exportAttributeOps; - }; - Payload payload = {exportPositionOps, exportAttributeOps}; + { + struct Payload { + SmallVectorImpl &exportPositionOps; + SmallVectorImpl &exportAttributeOps; + SmallVectorImpl &writeXfbOutputOps; + }; + Payload payload = {exportPositionOps, exportAttributeOps, writeXfbOutputOps}; - static const auto visitor = - llvm_dialects::VisitorBuilder() - .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) - .add([](Payload &payload, NggExportPositionOp &exportPositionOp) { - payload.exportPositionOps.push_back(&exportPositionOp); - }) - .add([](Payload &payload, NggExportAttributeOp &exportAttributeOp) { - payload.exportAttributeOps.push_back(&exportAttributeOp); - }) - .build(); - visitor.visit(payload, *target); + static const auto visitor = + llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add([](Payload &payload, NggExportPositionOp &exportPositionOp) { + payload.exportPositionOps.push_back(&exportPositionOp); + }) + .add([](Payload &payload, NggExportAttributeOp &exportAttributeOp) { + payload.exportAttributeOps.push_back(&exportAttributeOp); + }) + .add([](Payload &payload, WriteXfbOutputOp &writeXfbOutputOp) { + payload.writeXfbOutputOps.push_back(&writeXfbOutputOp); + }) + .build(); + visitor.visit(payload, *exportCollector); + } - // If there are no position/attribute exports, skip further processing - if (exportPositionOps.empty() && exportAttributeOps.empty()) - return; + // + // Construct the return value of export reader + // + IRBuilder<>::InsertPointGuard guard(m_builder); - assert(!exportPositionOps.empty()); // Position0 export is always present - ReturnInst *retInst = dyn_cast(exportPositionOps[0]->getParent()->getTerminator()); + ReturnInst *retInst = nullptr; + if (!exportPositionOps.empty()) + retInst = dyn_cast(exportPositionOps[0]->getParent()->getTerminator()); + else if (!exportAttributeOps.empty()) + retInst = dyn_cast(exportAttributeOps[0]->getParent()->getTerminator()); + else if (!writeXfbOutputOps.empty()) + retInst = dyn_cast(writeXfbOutputOps[0]->getParent()->getTerminator()); assert(retInst); + m_builder.SetInsertPoint(retInst); + + Value *returnValue = PoisonValue::get(returnTy); + unsigned index = 0; + if (numPositionExports > 0) { + Value *positionExportValues = PoisonValue::get(positionExportsTy); + unsigned i = 0; + + for (auto exportPositionOp : exportPositionOps) { + std::array exportValues = {exportPositionOp->getExportValue0(), exportPositionOp->getExportValue1(), + exportPositionOp->getExportValue2(), exportPositionOp->getExportValue3()}; + unsigned channelMask = 0; + Value *positionExportValue = PoisonValue::get(exportTy); + for (unsigned j = 0; j < 4; ++j) { + if (!isa(exportValues[j]) && !isa(exportValues[j])) + channelMask |= (1u << j); // Update channel mask if the value is valid (not unspecified) + positionExportValue = m_builder.CreateInsertElement(positionExportValue, exportValues[j], j); + } - // - // Reorder vertex position/attribute exports. - // - for (auto exportPositionOp : exportPositionOps) - exportPositionOp->moveBefore(retInst); + (*positionExports)[i] = {}; + (*positionExports)[i].exportSlot = exportPositionOp->getExportSlot(); + (*positionExports)[i].channelMask = channelMask; - // NOTE: If the workaround of attributes-through-memory preceding vertex position data is required, we have to - // place vertex exports after all attribute exports (ATM operations). - Instruction *movePoint = retInst; - if (m_pipelineState->getTargetInfo().getGpuWorkarounds().gfx11.waAtmPrecedesPos) { - if (!exportAttributeOps.empty()) { - m_builder.SetInsertPoint(exportPositionOps[0]); - movePoint = - m_builder.CreateFence(AtomicOrdering::Release, m_builder.getContext().getOrInsertSyncScopeID("agent")); + positionExportValues = m_builder.CreateInsertValue(positionExportValues, positionExportValue, i); + ++i; } + + returnValue = m_builder.CreateInsertValue(returnValue, positionExportValues, index++); } - for (auto exportAttributeOp : exportAttributeOps) - exportAttributeOp->moveBefore(movePoint); + if (numAttributeExports > 0) { + Value *attributeExportValues = PoisonValue::get(attributeExportsTy); + unsigned i = 0; - // - // Lower vertex position/attribute exports. - // - for (auto exportPositionOp : exportPositionOps) { - m_builder.SetInsertPoint(exportPositionOp); - const bool lastExport = exportPositionOp == exportPositionOps[exportPositionOps.size() - 1]; - exportPosition(exportPositionOp->getExportSlot(), - {exportPositionOp->getExportValue0(), exportPositionOp->getExportValue1(), - exportPositionOp->getExportValue2(), exportPositionOp->getExportValue3()}, - lastExport); + for (auto exportAttributeOp : exportAttributeOps) { + std::array exportValues = {exportAttributeOp->getExportValue0(), exportAttributeOp->getExportValue1(), + exportAttributeOp->getExportValue2(), + exportAttributeOp->getExportValue3()}; + unsigned channelMask = 0; + Value *attributeExportValue = PoisonValue::get(exportTy); + for (unsigned j = 0; j < 4; ++j) { + if (!isa(exportValues[j]) && !isa(exportValues[j])) + channelMask |= (1u << j); // Update channel mask if the value is valid (not unspecified) + attributeExportValue = m_builder.CreateInsertElement(attributeExportValue, exportValues[j], j); + } + (*attributeExports)[i] = {}; + (*attributeExports)[i].exportSlot = exportAttributeOp->getExportSlot(); + (*attributeExports)[i].channelMask = channelMask; + + attributeExportValues = m_builder.CreateInsertValue(attributeExportValues, attributeExportValue, i); + ++i; + } + + returnValue = m_builder.CreateInsertValue(returnValue, attributeExportValues, index++); } - for (auto exportAttributeOp : exportAttributeOps) { - m_builder.SetInsertPoint(exportAttributeOp); - exportAttribute(exportAttributeOp->getExportSlot(), - {exportAttributeOp->getExportValue0(), exportAttributeOp->getExportValue1(), - exportAttributeOp->getExportValue2(), exportAttributeOp->getExportValue3()}, - attribRingBufDesc, attribRingBaseOffset, vertexIndex); + if (numXfbExports > 0) { + Value *xfbExportValues = PoisonValue::get(xfbExportsTy); + unsigned i = 0; + unsigned offsetInVertex = 0; + + for (auto writeXfbOutputOp : writeXfbOutputOps) { + auto outputValue = writeXfbOutputOp->getOutputValue(); + auto outputTy = outputValue->getType(); + assert(outputTy->getScalarSizeInBits() == 32); + unsigned numElements = outputTy->isVectorTy() ? cast(outputTy)->getNumElements() : 1; + assert(numElements <= 4); + + (*xfbExports)[i] = {}; + (*xfbExports)[i].xfbBuffer = writeXfbOutputOp->getXfbBuffer(); + (*xfbExports)[i].xfbOffset = writeXfbOutputOp->getXfbOffset(); + (*xfbExports)[i].numElements = numElements; + + if (m_hasGs) { + // NOTE: For GS, the output value must be loaded by NggReadGsOutputOp. This is generated by copy shader. + NggReadGsOutputOp *readGsOutputOp = dyn_cast(outputValue); + assert(readGsOutputOp->getStreamId() == writeXfbOutputOp->getStreamId()); // Stream IDs must match + + (*xfbExports)[i].locInfo.streamId = writeXfbOutputOp->getStreamId(); + (*xfbExports)[i].locInfo.location = readGsOutputOp->getLocation(); + (*xfbExports)[i].locInfo.component = readGsOutputOp->getComponent(); + + ++i; + continue; + } + + if (outputTy->isIntOrIntVectorTy()) { + if (numElements == 1) { + outputValue = m_builder.CreateBitCast(outputValue, m_builder.getFloatTy()); + } else { + outputValue = m_builder.CreateBitCast(outputValue, FixedVectorType::get(m_builder.getFloatTy(), numElements)); + } + } + + // Always pad the write value to <4 x float> + Value *xfbExportValue = outputValue; + if (numElements == 1) { + xfbExportValue = + m_builder.CreateInsertElement(PoisonValue::get(exportTy), outputValue, static_cast(0)); + } else if (numElements < 4) { + xfbExportValue = m_builder.CreateShuffleVector(outputValue, PoisonValue::get(outputValue->getType()), + ArrayRef({0U, 1U, 2U, 3U})); + } + + (*xfbExports)[i].offsetInVertex = offsetInVertex; + offsetInVertex += numElements; // Increment the offset + + xfbExportValues = m_builder.CreateInsertValue(xfbExportValues, xfbExportValue, i); + ++i; + } + + returnValue = m_builder.CreateInsertValue(returnValue, xfbExportValues, index++); } + m_builder.CreateRet(returnValue); + + // Remove original return instruction + retInst->eraseFromParent(); + retInst = nullptr; // - // Remove export dialect ops. + // Clean-up // for (auto exportPositionOp : exportPositionOps) { exportPositionOp->dropAllReferences(); @@ -6342,87 +6389,434 @@ void NggPrimShader::mutateToExportVertex(Function *&target) { exportAttributeOp->dropAllReferences(); exportAttributeOp->eraseFromParent(); } + + for (auto writeXfbOutputOp : writeXfbOutputOps) { + writeXfbOutputOp->dropAllReferences(); + writeXfbOutputOp->eraseFromParent(); + } + + return exportCollector; } // ===================================================================================================================== -// Export vertex position. +// Collect exports from the specified function by making an export collector (either clone it or mutate it), calling it, +// and analyzing its return value. // -// @param exportSlot : Export slot -// @param exportValues : Vertex position values to export -// @param lastExport : Whether this is the last export -void NggPrimShader::exportPosition(unsigned exportSlot, ArrayRef exportValues, bool lastExport) { - assert(exportValues.size() == 4); - - unsigned channelMask = 0; - for (unsigned i = 0; i < 4; ++i) { - assert(exportValues[i]); - if (!isa(exportValues[i]) && !isa(exportValues[i])) - channelMask |= (1u << i); // Update channel mask if the value is valid (not unspecified) - } - - m_builder.CreateIntrinsic(Intrinsic::amdgcn_exp, m_builder.getFloatTy(), - {m_builder.getInt32(EXP_TARGET_POS_0 + exportSlot), // tgt - m_builder.getInt32(channelMask), // en - exportValues[0], // src0 - exportValues[1], // src1 - exportValues[2], // src2 - exportValues[3], // src3 - m_builder.getInt1(lastExport), // done - m_builder.getFalse()}); // vm +// @param args : Arguments of primitive shader entry-point +// @param [in/out] fromFunc : Function from which to collect exports +// @param makeClone : Whether to make a clone of the specified function +// @param [out] positionExports : Collection of position exports +// @param [out] attributeExports : Collection of attribute exports +// @param [out] xfbExports : Collection of XFB exports +void NggPrimShader::collectExports(ArrayRef args, Function *&fromFunc, bool makeClone, + SmallVectorImpl *positionExports, + SmallVectorImpl *attributeExports, + SmallVectorImpl *xfbExports) { + const bool collectPositionExports = positionExports != nullptr; + const bool collectAttributeExports = attributeExports != nullptr; + const bool collectXfbExports = m_pipelineState->enableSwXfb() && xfbExports != nullptr; + assert(collectPositionExports || collectAttributeExports || collectXfbExports); + + // + // Mutate the specified function to an export collector + // + auto exportCollector = makeExportCollector(fromFunc, makeClone, positionExports, attributeExports, xfbExports); + if (!exportCollector) + return; // No export to collect + + // + // Run the export collector to collect exports + // + SmallVector exportCollectorArgs; + + if (m_hasGs) { + // The export collector is derived from copy shader + Value *vertexIndex = m_nggInputs.threadIdInSubgroup; + if (m_compactVertex) { + auto collectExportBlock = m_builder.GetInsertBlock(); + + auto uncompactVertexIndexBlock = createBlock(collectExportBlock->getParent(), ".uncompactVertexIndex"); + uncompactVertexIndexBlock->moveAfter(collectExportBlock); + + auto endUncompactVertexIndexBlock = createBlock(collectExportBlock->getParent(), ".endUncompactVertexIndex"); + endUncompactVertexIndexBlock->moveAfter(uncompactVertexIndexBlock); + + m_builder.CreateCondBr(m_compactVertex, uncompactVertexIndexBlock, endUncompactVertexIndexBlock); + + // Construct ".uncompactVertexIndex" block + Value *uncompactedVertexIndex = nullptr; + { + m_builder.SetInsertPoint(uncompactVertexIndexBlock); + + uncompactedVertexIndex = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, + PrimShaderLdsRegion::VertexIndexMap); + + m_builder.CreateBr(endUncompactVertexIndexBlock); + } + + // Construct ".endUncompactVertexIndex" block + { + m_builder.SetInsertPoint(endUncompactVertexIndexBlock); + + vertexIndex = createPhi({{uncompactedVertexIndex, uncompactVertexIndexBlock}, + {vertexIndex, uncompactVertexIndexBlock->getSinglePredecessor()}}); + } + } + + exportCollectorArgs.push_back(vertexIndex); // Only one argument + } else { + // The export collector is derived from ES + Value *offChipLdsBase = args[ShaderMerger::getSpecialSgprInputIndex(m_gfxIp, EsGs::OffChipLdsBase)]; + offChipLdsBase->setName("offChipLdsBase"); + + Value *userData = args[NumSpecialSgprInputs]; + + ArrayRef vgprArgs(args.begin() + NumSpecialSgprInputs + 1, args.end()); + + Value *tessCoordX = nullptr; + Value *tessCoordY = nullptr; + Value *relPatchId = nullptr; + Value *patchId = nullptr; + + Value *vertexId = nullptr; + Value *relVertexId = PoisonValue::get(m_builder.getInt32Ty()); // Unused + // NOTE: VS primitive ID for NGG is specially obtained from primitive ID distribution. + Value *vsPrimitiveId = + m_distributedPrimitiveId ? m_distributedPrimitiveId : PoisonValue::get(m_builder.getInt32Ty()); + Value *instanceId = nullptr; + + if (m_gfxIp.major <= 11) { + if (m_hasTes) { + tessCoordX = vgprArgs[5]; + tessCoordY = vgprArgs[6]; + relPatchId = vgprArgs[7]; + patchId = vgprArgs[8]; + } else { + vertexId = vgprArgs[5]; + instanceId = vgprArgs[8]; + } + } else { + llvm_unreachable("Not implemented!"); + } + + if (m_compactVertex) { + auto collectExportBlock = m_builder.GetInsertBlock(); + + auto uncompactVertexBlock = createBlock(collectExportBlock->getParent(), ".uncompactVertex"); + uncompactVertexBlock->moveAfter(collectExportBlock); + + auto endUncompactVertexBlock = createBlock(collectExportBlock->getParent(), ".endUncompactVertex"); + endUncompactVertexBlock->moveAfter(uncompactVertexBlock); + + m_builder.CreateCondBr(m_compactVertex, uncompactVertexBlock, endUncompactVertexBlock); + + // Construct ".uncompactVertex" block + Value *newTessCoordX = nullptr; + Value *newTessCoordY = nullptr; + Value *newRelPatchId = nullptr; + Value *newPatchId = nullptr; + Value *newVertexId = nullptr; + Value *newVsPrimitiveId = nullptr; + Value *newInstanceId = nullptr; + { + m_builder.SetInsertPoint(uncompactVertexBlock); + + const unsigned esGsRingItemSize = + m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.hwConfig.esGsRingItemSize; + + auto uncompactedVertexIndex = readPerThreadDataFromLds(m_builder.getInt32Ty(), m_nggInputs.threadIdInSubgroup, + PrimShaderLdsRegion::VertexIndexMap); + auto vertexItemOffset = m_builder.CreateMul(uncompactedVertexIndex, m_builder.getInt32(esGsRingItemSize)); + + // NOTE: If vertex compaction, some system values could be from vertex compaction info rather than from VGPRs + // (caused by NGG culling and vertex compaction) + const auto resUsage = + m_pipelineState->getShaderResourceUsage(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex); + if (m_hasTes) { + if (resUsage->builtInUsage.tes.tessCoord) { + newTessCoordX = + readVertexCullInfoFromLds(m_builder.getFloatTy(), vertexItemOffset, m_vertCullInfoOffsets.tessCoordX); + newTessCoordY = + readVertexCullInfoFromLds(m_builder.getFloatTy(), vertexItemOffset, m_vertCullInfoOffsets.tessCoordY); + } + + newRelPatchId = + readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.relPatchId); + + if (resUsage->builtInUsage.tes.primitiveId) { + newPatchId = + readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.patchId); + } + } else { + if (resUsage->builtInUsage.vs.vertexIndex) { + newVertexId = + readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.vertexId); + } + + // NOTE: Relative vertex index provided by HW is not used when VS is merged to GS. + + if (resUsage->builtInUsage.vs.primitiveId) { + newVsPrimitiveId = + readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.primitiveId); + } + + if (resUsage->builtInUsage.vs.instanceIndex) { + newInstanceId = + readVertexCullInfoFromLds(m_builder.getInt32Ty(), vertexItemOffset, m_vertCullInfoOffsets.instanceId); + } + } + m_builder.CreateBr(endUncompactVertexBlock); + } + + // Construct ".endUncompactVertex" block + { + m_builder.SetInsertPoint(endUncompactVertexBlock); + + if (m_hasTes) { + if (newTessCoordX) + tessCoordX = createPhi( + {{newTessCoordX, uncompactVertexBlock}, {tessCoordX, uncompactVertexBlock->getSinglePredecessor()}}); + + if (newTessCoordY) + tessCoordY = createPhi( + {{newTessCoordY, uncompactVertexBlock}, {tessCoordY, uncompactVertexBlock->getSinglePredecessor()}}); + + assert(newRelPatchId); + relPatchId = createPhi( + {{newRelPatchId, uncompactVertexBlock}, {relPatchId, uncompactVertexBlock->getSinglePredecessor()}}); + + if (newPatchId) + patchId = createPhi( + {{newPatchId, uncompactVertexBlock}, {patchId, uncompactVertexBlock->getSinglePredecessor()}}); + } else { + if (newVertexId) + vertexId = createPhi( + {{newVertexId, uncompactVertexBlock}, {vertexId, uncompactVertexBlock->getSinglePredecessor()}}); + + if (newVsPrimitiveId) + vsPrimitiveId = createPhi({{newVsPrimitiveId, uncompactVertexBlock}, + {vsPrimitiveId, uncompactVertexBlock->getSinglePredecessor()}}); + + if (newInstanceId) + instanceId = createPhi( + {{newInstanceId, uncompactVertexBlock}, {instanceId, uncompactVertexBlock->getSinglePredecessor()}}); + } + } + } + + // Set up user data SGPRs + const unsigned userDataCount = + m_pipelineState->getShaderInterfaceData(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex)->userDataCount; + appendUserData(exportCollectorArgs, exportCollector, userData, userDataCount); + + if (m_hasTes) { + // Set up system value SGPRs + exportCollectorArgs.push_back(offChipLdsBase); + + // Set up system value VGPRs + exportCollectorArgs.push_back(tessCoordX); + exportCollectorArgs.push_back(tessCoordY); + exportCollectorArgs.push_back(relPatchId); + exportCollectorArgs.push_back(patchId); + } else { + // Set up system value VGPRs + exportCollectorArgs.push_back(vertexId); + exportCollectorArgs.push_back(relVertexId); + exportCollectorArgs.push_back(vsPrimitiveId); + exportCollectorArgs.push_back(instanceId); + } + } + + assert(exportCollectorArgs.size() == exportCollector->arg_size()); // Must have visit all arguments of export reader + auto returnValue = callFunctionHelper(exportCollector, exportCollectorArgs, m_builder.GetInsertBlock()); + + // + // Analyze the return value to extract export values + // + unsigned index = 0; + + const unsigned numPositionExports = collectPositionExports ? positionExports->size() : 0; + const unsigned numAttributeExports = collectAttributeExports ? attributeExports->size() : 0; + const unsigned numXfbExports = collectXfbExports ? xfbExports->size() : 0; + + if (numPositionExports > 0) { + auto positionExportValues = m_builder.CreateExtractValue(returnValue, index++); + assert(positionExportValues->getType()->isArrayTy()); + assert(numPositionExports == positionExportValues->getType()->getArrayNumElements()); // Sizes must match + + for (unsigned i = 0; i < numPositionExports; ++i) + (*positionExports)[i].exportValue = m_builder.CreateExtractValue(positionExportValues, i); + } + + if (numAttributeExports > 0) { + auto attributeExportValues = m_builder.CreateExtractValue(returnValue, index++); + assert(attributeExportValues->getType()->isArrayTy()); + assert(numAttributeExports == attributeExportValues->getType()->getArrayNumElements()); // Sizes must match + + for (unsigned i = 0; i < numAttributeExports; ++i) + (*attributeExports)[i].exportValue = m_builder.CreateExtractValue(attributeExportValues, i); + } + + if (numXfbExports > 0) { + auto xfbExportValues = m_builder.CreateExtractValue(returnValue, index++); + assert(xfbExportValues->getType()->isArrayTy()); + assert(numXfbExports == xfbExportValues->getType()->getArrayNumElements()); // Sizes must match + + for (unsigned i = 0; i < numXfbExports; ++i) { + auto xfbExportValue = m_builder.CreateExtractValue(xfbExportValues, i); + if ((*xfbExports)[i].numElements == 1) { + (*xfbExports)[i].exportValue = m_builder.CreateExtractElement(xfbExportValue, static_cast(0)); + } else { + SmallVector shuffleMask; + for (unsigned j = 0; j < (*xfbExports)[i].numElements; ++j) + shuffleMask.push_back(j); + (*xfbExports)[i].exportValue = m_builder.CreateShuffleVector(xfbExportValue, xfbExportValue, shuffleMask); + } + } + } } // ===================================================================================================================== -// Export vertex attribute. +// Create PHI node for the export values of the export collections. // -// @param exportSlot : Export slot -// @param exportValues : Vertex attribute values to export -// @param attribRingBufDesc : Attribute ring buffer descriptor -// @param attribRingBaseOffset : Subgroup's attribute ring base offset (in bytes) -// @param vertexIndex : Vertex index in subgroup -void NggPrimShader::exportAttribute(unsigned exportSlot, ArrayRef exportValues, Value *attribRingBufDesc, - Value *attribRingBaseOffset, Value *vertexIndex) { - assert(exportValues.size() == 4); - - if (m_pipelineState->exportAttributeByExportInstruction()) { - unsigned channelMask = 0; - for (unsigned i = 0; i < 4; ++i) { - assert(exportValues[i]); - if (!isa(exportValues[i]) && !isa(exportValues[i])) - channelMask |= (1u << i); // Update channel mask if the value is valid (not unspecified) - } - - m_builder.CreateIntrinsic(Intrinsic::amdgcn_exp, m_builder.getFloatTy(), - {m_builder.getInt32(EXP_TARGET_PARAM_0 + exportSlot), // tgt - m_builder.getInt32(channelMask), // en - exportValues[0], // src0 - exportValues[1], // src1 - exportValues[2], // src2 - exportValues[3], // src3 - m_builder.getFalse(), // done - m_builder.getFalse()}); // vm - } else { - auto locationOffset = m_builder.getInt32(exportSlot * SizeOfVec4); +// @param [out] positionExports : Collection of position exports +// @param [out] attributeExports : Collection of attribute exports +// @param [out] xfbExports : Collection of XFB exports +void NggPrimShader::createPhiForExports(SmallVectorImpl *positionExports, + SmallVectorImpl *attributeExports, + SmallVectorImpl *xfbExports) { + auto currentBlock = m_builder.GetInsertBlock(); + + if (positionExports) { + for (auto &positionExport : *positionExports) { + auto &exportValue = positionExport.exportValue; + auto exportBlock = cast(exportValue)->getParent(); + std::string valueName = "position" + std::to_string(positionExport.exportSlot); + + auto exportValuePhi = m_builder.CreatePHI(exportValue->getType(), pred_size(currentBlock), valueName); + for (BasicBlock *predBlock : predecessors(currentBlock)) { + exportValuePhi->addIncoming(predBlock == exportBlock ? exportValue : PoisonValue::get(exportValue->getType()), + predBlock); + } + exportValue = exportValuePhi; + } + } - Value *exportValue = PoisonValue::get(FixedVectorType::get(m_builder.getFloatTy(), 4)); // Must be <4 x float> - for (unsigned i = 0; i < 4; ++i) - exportValue = m_builder.CreateInsertElement(exportValue, exportValues[i], i); + if (attributeExports) { + for (auto &attributeExport : *attributeExports) { + auto &exportValue = attributeExport.exportValue; + auto exportBlock = cast(exportValue)->getParent(); + std::string valueName = "attribute" + std::to_string(attributeExport.exportSlot); - CoherentFlag coherent = {}; - if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { - coherent.bits.glc = true; + auto exportValuePhi = m_builder.CreatePHI(exportValue->getType(), pred_size(currentBlock), valueName); + for (BasicBlock *predBlock : predecessors(currentBlock)) { + exportValuePhi->addIncoming(predBlock == exportBlock ? exportValue : PoisonValue::get(exportValue->getType()), + predBlock); + } + exportValue = exportValuePhi; } + } - m_builder.CreateIntrinsic(m_builder.getVoidTy(), Intrinsic::amdgcn_struct_buffer_store, - {exportValue, attribRingBufDesc, vertexIndex, locationOffset, attribRingBaseOffset, - m_builder.getInt32(coherent.u32All)}); + if (xfbExports && m_pipelineState->enableSwXfb()) { + for (auto &xfbExport : *xfbExports) { + auto &exportValue = xfbExport.exportValue; + auto exportBlock = cast(exportValue)->getParent(); + std::string valueName = + "xfb.buffer" + std::to_string(xfbExport.xfbBuffer) + ".offset" + std::to_string(xfbExport.xfbOffset); + switch (xfbExport.numElements) { + case 1: + valueName += ".x"; + break; + case 2: + valueName += ".xy"; + break; + case 3: + valueName += ".xyz"; + break; + case 4: + valueName += ".xyzw"; + break; + default: + llvm_unreachable("Unexpected number of elements"); + break; + } + auto exportValuePhi = m_builder.CreatePHI(exportValue->getType(), pred_size(currentBlock), valueName); + for (BasicBlock *predBlock : predecessors(currentBlock)) { + exportValuePhi->addIncoming(predBlock == exportBlock ? exportValue : PoisonValue::get(exportValue->getType()), + predBlock); + } + exportValue = exportValuePhi; + } } } // ===================================================================================================================== -// Processes SW emulated transform feedback when API GS is not present. +// Export positions. +// +// @param positionExports : Input collection of position exports +void NggPrimShader::exportPositions(const SmallVectorImpl &positionExports) { + CallInst *lastExport = nullptr; + for (auto &positionExport : positionExports) { + std::array exportValues; + for (unsigned i = 0; i < 4; ++i) + exportValues[i] = m_builder.CreateExtractElement(positionExport.exportValue, i); + + lastExport = m_builder.CreateIntrinsic(Intrinsic::amdgcn_exp, m_builder.getFloatTy(), + {m_builder.getInt32(EXP_TARGET_POS_0 + positionExport.exportSlot), // tgt + m_builder.getInt32(positionExport.channelMask), // en + exportValues[0], // src0 + exportValues[1], // src1 + exportValues[2], // src2 + exportValues[3], // src3 + m_builder.getFalse(), // done + m_builder.getFalse()}); // vm + } + + if (lastExport) + lastExport->setArgOperand(6, m_builder.getTrue()); // Set Done flag +} + +// ===================================================================================================================== +// Export attributes. +// +// @param attributeExports : Input collection of attribute exports +void NggPrimShader::exportAttributes(const SmallVectorImpl &attributeExports) { + for (auto &attributeExport : attributeExports) { + if (m_pipelineState->exportAttributeByExportInstruction()) { + std::array exportValues; + for (unsigned i = 0; i < 4; ++i) + exportValues[i] = m_builder.CreateExtractElement(attributeExport.exportValue, i); + + m_builder.CreateIntrinsic(Intrinsic::amdgcn_exp, m_builder.getFloatTy(), + {m_builder.getInt32(EXP_TARGET_PARAM_0 + attributeExport.exportSlot), // tgt + m_builder.getInt32(attributeExport.channelMask), // en + exportValues[0], // src0 + exportValues[1], // src1 + exportValues[2], // src2 + exportValues[3], // src3 + m_builder.getFalse(), // done + m_builder.getFalse()}); // vm + } else { + auto attributeOffset = m_builder.getInt32(attributeExport.exportSlot * SizeOfVec4); + + CoherentFlag coherent = {}; + if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { + coherent.bits.glc = true; + } + + m_builder.CreateIntrinsic(m_builder.getVoidTy(), Intrinsic::amdgcn_struct_buffer_store, + {attributeExport.exportValue, m_attribRingBufDesc, m_nggInputs.threadIdInSubgroup, + attributeOffset, m_attribRingBaseOffset, m_builder.getInt32(coherent.u32All)}); + } + } +} + +// ===================================================================================================================== +// Processes SW emulated XFB when API GS is not present. // // @param args : Arguments of primitive shader entry-point -void NggPrimShader::processSwXfb(ArrayRef args) { +// @param xfbExports : Input collection of XFB exports +void NggPrimShader::processSwXfb(ArrayRef args, const SmallVectorImpl &xfbExports) { assert(m_pipelineState->enableSwXfb()); assert(!m_hasGs); // API GS is not present @@ -6436,10 +6830,8 @@ void NggPrimShader::processSwXfb(ArrayRef args) { // The processing is something like this: // // NGG_XFB() { - // if (threadIdInSubgroup < vertCountInSubgroup) { - // Mutate/clone ES to fetch XFB outputs - // Write XFB outputs to LDS region - // } + // if (threadIdInSubgroup < vertCountInSubgroup) + // Write XFB to LDS // // Prepare XFB to update its relevant counters // Barrier @@ -6448,75 +6840,56 @@ void NggPrimShader::processSwXfb(ArrayRef args) { // Read primsToWrite and dwordsWritten from XFB statistics info // // if (threadIdInSubgroup < primsToWrite) - // Export XFB outputs to buffer for each vertice of this primitive + // Export XFB to buffer for each vertice of this primitive // } // BasicBlock *xfbEntryBlock = m_builder.GetInsertBlock(); - BasicBlock *fetchXfbOutputBlock = createBlock(xfbEntryBlock->getParent(), ".fetchXfbOutput"); - fetchXfbOutputBlock->moveAfter(xfbEntryBlock); - BasicBlock *endFetchXfbOutputBlock = createBlock(xfbEntryBlock->getParent(), ".endFetchXfbOutput"); - endFetchXfbOutputBlock->moveAfter(fetchXfbOutputBlock); + BasicBlock *writeXfbBlock = createBlock(xfbEntryBlock->getParent(), ".writeXfb"); + writeXfbBlock->moveAfter(xfbEntryBlock); + BasicBlock *endWriteXfbBlock = createBlock(xfbEntryBlock->getParent(), ".endWriteXfb"); + endWriteXfbBlock->moveAfter(writeXfbBlock); unsigned possibleVertsPerPrim = 3; if (isa(m_verticesPerPrimitive)) possibleVertsPerPrim = cast(m_verticesPerPrimitive)->getZExtValue(); - BasicBlock *exportXfbOutputBlock[3] = {}; - auto insertPos = endFetchXfbOutputBlock; + BasicBlock *exportXfbBlock[3] = {}; + auto insertPos = endWriteXfbBlock; for (unsigned i = 0; i < possibleVertsPerPrim; ++i) { - exportXfbOutputBlock[i] = createBlock(xfbEntryBlock->getParent(), ".exportXfbOutputInVertex" + std::to_string(i)); - exportXfbOutputBlock[i]->moveAfter(insertPos); - insertPos = exportXfbOutputBlock[i]; + exportXfbBlock[i] = createBlock(xfbEntryBlock->getParent(), ".exportXfbInVertex" + std::to_string(i)); + exportXfbBlock[i]->moveAfter(insertPos); + insertPos = exportXfbBlock[i]; } - BasicBlock *endExportXfbOutputBlock = createBlock(xfbEntryBlock->getParent(), ".endExportXfbOutput"); - endExportXfbOutputBlock->moveAfter(insertPos); + BasicBlock *endExportXfbBlock = createBlock(xfbEntryBlock->getParent(), ".endExportXfb"); + endExportXfbBlock->moveAfter(insertPos); - // Insert branching in current block to process transform feedback export + // Insert branching in current block to process XFB { auto validVertex = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_nggInputs.vertCountInSubgroup); - m_builder.CreateCondBr(validVertex, fetchXfbOutputBlock, endFetchXfbOutputBlock); + m_builder.CreateCondBr(validVertex, writeXfbBlock, endWriteXfbBlock); } - // Construct ".fetchXfbOutput" block - SmallVector xfbOutputExports; + // Construct ".writeXfb" block { - m_builder.SetInsertPoint(fetchXfbOutputBlock); + m_builder.SetInsertPoint(writeXfbBlock); - auto xfbOutputs = fetchXfbOutput(m_esHandlers.main, args, xfbOutputExports); + for (unsigned i = 0; i < xfbExports.size(); ++i) + writeXfbOutputToLds(xfbExports[i].exportValue, m_nggInputs.threadIdInSubgroup, + xfbExports[i].offsetInVertex); // Write XFB to LDS - for (unsigned i = 0; i < xfbOutputExports.size(); ++i) { - const auto &xfbOutputExport = xfbOutputExports[i]; - assert(xfbOutputs->getType()->isArrayTy()); // Must be arrayed - auto outputValue = m_builder.CreateExtractValue(xfbOutputs, i); - - // Extract valid elements from returned transform feedback output - assert(outputValue->getType() == FixedVectorType::get(m_builder.getInt32Ty(), 4)); // Must be <4 x i32> - if (xfbOutputExport.numElements == 1) { - outputValue = m_builder.CreateExtractElement(outputValue, static_cast(0)); - } else { - SmallVector shuffleMask; - for (unsigned j = 0; j < xfbOutputExport.numElements; ++j) - shuffleMask.push_back(j); - outputValue = m_builder.CreateShuffleVector(outputValue, outputValue, shuffleMask); - } - - // Write transform feedback outputs to LDS region - writeXfbOutputToLds(outputValue, m_nggInputs.threadIdInSubgroup, xfbOutputExport.offsetInVertex); - } - - m_builder.CreateBr(endFetchXfbOutputBlock); + m_builder.CreateBr(endWriteXfbBlock); } - // Construct ".endFetchXfbOutput" block - Value *streamOutOffsets[MaxTransformFeedbackBuffers] = {}; // Stream-out offset to write transform feedback outputs + // Construct ".endWriteXfb" block + Value *streamOutOffsets[MaxTransformFeedbackBuffers] = {}; // Stream-out offset to write XFB outputs { - m_builder.SetInsertPoint(endFetchXfbOutputBlock); + m_builder.SetInsertPoint(endWriteXfbBlock); prepareSwXfb({m_nggInputs.primCountInSubgroup}); - // We are going to read transform feedback statistics info and outputs from LDS and export them to transform + // We are going to read XFB statistics info and outputs from LDS and export them to transform // feedback buffers. Make all values have been written before this. createFenceAndBarrier(); @@ -6534,7 +6907,7 @@ void NggPrimShader::processSwXfb(ArrayRef args) { {xfbStatInfo, m_builder.getInt32(MaxTransformFeedbackBuffers)}); auto validPrimitive = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, numPrimsToWrite); - m_builder.CreateCondBr(validPrimitive, exportXfbOutputBlock[0], endExportXfbOutputBlock); + m_builder.CreateCondBr(validPrimitive, exportXfbBlock[0], endExportXfbBlock); } Value *vertexIndices[3] = {}; @@ -6543,18 +6916,18 @@ void NggPrimShader::processSwXfb(ArrayRef args) { vertexIndices[2] = m_nggInputs.vertexIndex2; for (unsigned i = 0; i < possibleVertsPerPrim; ++i) { - // Construct ".exportXfbOutputInVertex[N]" block - m_builder.SetInsertPoint(exportXfbOutputBlock[i]); + // Construct ".exportXfbInVertex[N]" block + m_builder.SetInsertPoint(exportXfbBlock[i]); - for (unsigned j = 0; j < xfbOutputExports.size(); ++j) { - const auto &xfbOutputExport = xfbOutputExports[j]; - auto outputValue = readXfbOutputFromLds( - xfbOutputExport.numElements > 1 ? FixedVectorType::get(m_builder.getFloatTy(), xfbOutputExport.numElements) - : m_builder.getFloatTy(), - vertexIndices[i], xfbOutputExport.offsetInVertex); + for (unsigned j = 0; j < xfbExports.size(); ++j) { + const auto &xfbExport = xfbExports[j]; + auto exportValue = readXfbOutputFromLds(xfbExport.numElements > 1 + ? FixedVectorType::get(m_builder.getFloatTy(), xfbExport.numElements) + : m_builder.getFloatTy(), + vertexIndices[i], xfbExport.offsetInVertex); unsigned format = 0; - switch (xfbOutputExport.numElements) { + switch (xfbExport.numElements) { case 1: format = BUF_FORMAT_32_FLOAT; break; @@ -6581,38 +6954,38 @@ void NggPrimShader::processSwXfb(ArrayRef args) { // vertexOffset = (threadIdInSubgroup * vertsPerPrim + vertexIndex) * xfbStride Value *vertexOffset = m_builder.CreateAdd( m_builder.CreateMul(m_nggInputs.threadIdInSubgroup, m_verticesPerPrimitive), m_builder.getInt32(i)); - vertexOffset = m_builder.CreateMul(vertexOffset, m_builder.getInt32(xfbStrides[xfbOutputExport.xfbBuffer])); + vertexOffset = m_builder.CreateMul(vertexOffset, m_builder.getInt32(xfbStrides[xfbExport.xfbBuffer])); // xfbOutputOffset = vertexOffset + xfbOffset - Value *xfbOutputOffset = m_builder.CreateAdd(vertexOffset, m_builder.getInt32(xfbOutputExport.xfbOffset)); + Value *xfbOutputOffset = m_builder.CreateAdd(vertexOffset, m_builder.getInt32(xfbExport.xfbOffset)); m_builder.CreateIntrinsic(m_builder.getVoidTy(), Intrinsic::amdgcn_raw_tbuffer_store, - {outputValue, // vdata - m_streamOutBufDescs[xfbOutputExport.xfbBuffer], // rsrc - xfbOutputOffset, // offset - streamOutOffsets[xfbOutputExport.xfbBuffer], // soffset - m_builder.getInt32(format), // format - m_builder.getInt32(coherent.u32All)}); // auxiliary data + {exportValue, // vdata + m_streamOutBufDescs[xfbExport.xfbBuffer], // rsrc + xfbOutputOffset, // offset + streamOutOffsets[xfbExport.xfbBuffer], // soffset + m_builder.getInt32(format), // format + m_builder.getInt32(coherent.u32All)}); // auxiliary data } if (i == possibleVertsPerPrim - 1) { // Last vertex - m_builder.CreateBr(endExportXfbOutputBlock); + m_builder.CreateBr(endExportXfbBlock); } else { // Not last vertex, check if we need to export outputs of next vertex auto exportNextVertex = m_builder.CreateICmpUGT(m_verticesPerPrimitive, m_builder.getInt32(i + 1)); - m_builder.CreateCondBr(exportNextVertex, exportXfbOutputBlock[i + 1], endExportXfbOutputBlock); + m_builder.CreateCondBr(exportNextVertex, exportXfbBlock[i + 1], endExportXfbBlock); } } - // Construct ".endExportXfbOutput" block - { m_builder.SetInsertPoint(endExportXfbOutputBlock); } + // Construct ".endExportXfb" block + { m_builder.SetInsertPoint(endExportXfbBlock); } } // ===================================================================================================================== -// Process SW emulated transform feedback when API GS is present. +// Process SW emulated XFB when API GS is present. // // @param args : Arguments of primitive shader entry-point -void NggPrimShader::processSwXfbWithGs(ArrayRef args) { +void NggPrimShader::processSwXfbWithGs(ArrayRef args, const SmallVectorImpl &xfbExports) { assert(m_pipelineState->enableSwXfb()); assert(m_hasGs); // GS is present @@ -6657,8 +7030,6 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { // Compact primitive index (compacted -> uncompacted) // } // - // Mutate copy shader to fetch XFB outputs - // // Prepare XFB and update its relevant counters // Barrier // @@ -6667,7 +7038,7 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { // // for each vertex stream { // if (threadIdInSubgroup < primsToWrite) - // Export XFB outputs to buffer for each vertice of this primitive + // Export XFB to buffer for each vertice of this primitive // } // } // @@ -6705,22 +7076,21 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { } } - BasicBlock *exportXfbOutputBlock[MaxGsStreams] = {}; - BasicBlock *endExportXfbOutputBlock[MaxGsStreams] = {}; + BasicBlock *exportXfbBlock[MaxGsStreams] = {}; + BasicBlock *endExportXfbBlock[MaxGsStreams] = {}; for (unsigned i = 0; i < MaxGsStreams; ++i) { if (m_pipelineState->isVertexStreamActive(i)) { - exportXfbOutputBlock[i] = createBlock(xfbEntryBlock->getParent(), ".exportXfbOutputInStream" + std::to_string(i)); - exportXfbOutputBlock[i]->moveAfter(insertPos); - insertPos = exportXfbOutputBlock[i]; + exportXfbBlock[i] = createBlock(xfbEntryBlock->getParent(), ".exportXfbInStream" + std::to_string(i)); + exportXfbBlock[i]->moveAfter(insertPos); + insertPos = exportXfbBlock[i]; - endExportXfbOutputBlock[i] = - createBlock(xfbEntryBlock->getParent(), ".endExportXfbOutputInStream" + std::to_string(i)); - endExportXfbOutputBlock[i]->moveAfter(insertPos); - insertPos = endExportXfbOutputBlock[i]; + endExportXfbBlock[i] = createBlock(xfbEntryBlock->getParent(), ".endExportXfbInStream" + std::to_string(i)); + endExportXfbBlock[i]->moveAfter(insertPos); + insertPos = endExportXfbBlock[i]; } } - // Insert branching in current block to process transform feedback export + // Insert branching in current block to process XFB { auto validWave = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(m_maxWavesPerSubgroup + 1)); @@ -6845,8 +7215,6 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { endCompactPrimitiveIndexBlock[firstActiveStream]); } - SmallVector xfbOutputExports; - for (unsigned i = 0; i < MaxGsStreams; ++i) { if (!m_pipelineState->isVertexStreamActive(i)) continue; @@ -6878,11 +7246,8 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { { m_builder.SetInsertPoint(endCompactPrimitiveIndexBlock[i]); - if (i == lastActiveStream) { - // Start to fetch transform feedback outputs after we finish compacting primitive index of the last vertex - // stream. - fetchXfbOutput(m_gsHandlers.copyShader, args, xfbOutputExports); - } else { + if (i != lastActiveStream) { + // Start to prepare XFB after we finish compacting primitive index of the last vertex stream. unsigned nextActiveStream = i + 1; while (!m_pipelineState->isVertexStreamActive(nextActiveStream)) { ++nextActiveStream; @@ -6895,12 +7260,12 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { } } - Value *streamOutOffsets[MaxTransformFeedbackBuffers] = {}; // Stream-out offset to write transform feedback outputs + Value *streamOutOffsets[MaxTransformFeedbackBuffers] = {}; // Stream-out offset to write XFB outputs Value *numPrimsToWrite[MaxGsStreams] = {}; { prepareSwXfb(primCountInSubgroup); - // We are going to read transform feedback statistics info from LDS. Make sure the info has been written before + // We are going to read XFB statistics info from LDS. Make sure the info has been written before // this. createFenceAndBarrier(); @@ -6924,17 +7289,16 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { } auto validPrimitive = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, numPrimsToWrite[firstActiveStream]); - m_builder.CreateCondBr(validPrimitive, exportXfbOutputBlock[firstActiveStream], - endExportXfbOutputBlock[firstActiveStream]); + m_builder.CreateCondBr(validPrimitive, exportXfbBlock[firstActiveStream], endExportXfbBlock[firstActiveStream]); } for (unsigned i = 0; i < MaxGsStreams; ++i) { if (!m_pipelineState->isVertexStreamActive(i)) continue; - // Construct ".exportXfbOutputInStream[N]" block + // Construct ".exportXfbInStream[N]" block { - m_builder.SetInsertPoint(exportXfbOutputBlock[i]); + m_builder.SetInsertPoint(exportXfbBlock[i]); Value *vertexIndices[3] = {}; @@ -6977,20 +7341,18 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { } for (unsigned j = 0; j < outVertsPerPrim; ++j) { - for (unsigned k = 0; k < xfbOutputExports.size(); ++k) { - const auto &xfbOutputExport = xfbOutputExports[k]; - if (xfbOutputExport.locInfo.streamId != i) + for (unsigned k = 0; k < xfbExports.size(); ++k) { + const auto &xfbExport = xfbExports[k]; + if (xfbExport.locInfo.streamId != i) continue; // Output not belong to this stream - auto outputValue = - readGsOutput(xfbOutputExport.numElements > 1 - ? FixedVectorType::get(m_builder.getFloatTy(), xfbOutputExport.numElements) - : m_builder.getFloatTy(), - xfbOutputExport.locInfo.location, xfbOutputExport.locInfo.component, i, - calcVertexItemOffset(i, vertexIndices[j])); + auto exportValue = readGsOutput( + xfbExport.numElements > 1 ? FixedVectorType::get(m_builder.getFloatTy(), xfbExport.numElements) + : m_builder.getFloatTy(), + xfbExport.locInfo.location, xfbExport.locInfo.component, i, calcVertexItemOffset(i, vertexIndices[j])); unsigned format = 0; - switch (xfbOutputExport.numElements) { + switch (xfbExport.numElements) { case 1: format = BUF_FORMAT_32_FLOAT; break; @@ -7018,26 +7380,26 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { Value *vertexOffset = m_builder.CreateAdd( m_builder.CreateMul(m_nggInputs.threadIdInSubgroup, m_builder.getInt32(outVertsPerPrim)), m_builder.getInt32(j)); - vertexOffset = m_builder.CreateMul(vertexOffset, m_builder.getInt32(xfbStrides[xfbOutputExport.xfbBuffer])); + vertexOffset = m_builder.CreateMul(vertexOffset, m_builder.getInt32(xfbStrides[xfbExport.xfbBuffer])); // xfbOutputOffset = vertexOffset + xfbOffset - Value *xfbOutputOffset = m_builder.CreateAdd(vertexOffset, m_builder.getInt32(xfbOutputExport.xfbOffset)); + Value *xfbOutputOffset = m_builder.CreateAdd(vertexOffset, m_builder.getInt32(xfbExport.xfbOffset)); m_builder.CreateIntrinsic(m_builder.getVoidTy(), Intrinsic::amdgcn_raw_tbuffer_store, - {outputValue, // vdata - m_streamOutBufDescs[xfbOutputExport.xfbBuffer], // rsrc - xfbOutputOffset, // offset - streamOutOffsets[xfbOutputExport.xfbBuffer], // soffset - m_builder.getInt32(format), // format - m_builder.getInt32(coherent.u32All)}); // auxiliary data + {exportValue, // vdata + m_streamOutBufDescs[xfbExport.xfbBuffer], // rsrc + xfbOutputOffset, // offset + streamOutOffsets[xfbExport.xfbBuffer], // soffset + m_builder.getInt32(format), // format + m_builder.getInt32(coherent.u32All)}); // auxiliary data } } - m_builder.CreateBr(endExportXfbOutputBlock[i]); + m_builder.CreateBr(endExportXfbBlock[i]); } - // Construct ".endExportXfbOutputInStream[N]" block + // Construct ".endExportXfbInStream[N]" block { - m_builder.SetInsertPoint(endExportXfbOutputBlock[i]); + m_builder.SetInsertPoint(endExportXfbBlock[i]); if (i != lastActiveStream) { unsigned nextActiveStream = i + 1; @@ -7048,16 +7410,14 @@ void NggPrimShader::processSwXfbWithGs(ArrayRef args) { assert(nextActiveStream <= lastActiveStream); auto validPrimitive = m_builder.CreateICmpULT(m_nggInputs.threadIdInSubgroup, numPrimsToWrite[nextActiveStream]); - m_builder.CreateCondBr(validPrimitive, exportXfbOutputBlock[nextActiveStream], - endExportXfbOutputBlock[nextActiveStream]); + m_builder.CreateCondBr(validPrimitive, exportXfbBlock[nextActiveStream], endExportXfbBlock[nextActiveStream]); } } } } // ===================================================================================================================== -// Prepare SW emulated transform feedback. Update various counter relevant to transform feedback, such as dwordsWritten, -// primsNeed, and primsWritten. +// Prepare SW emulated XFB. Update various counter relevant to XFB, such as dwordsWritten, primsNeed, and primsWritten. // // @param primCountInSubgroup : Number of primitives in subgroup for each vertex stream void NggPrimShader::prepareSwXfb(ArrayRef primCountInSubgroup) { @@ -7074,7 +7434,7 @@ void NggPrimShader::prepareSwXfb(ArrayRef primCountInSubgroup) { for (unsigned i = 0; i < MaxTransformFeedbackBuffers; ++i) { for (unsigned j = 0; j < MaxGsStreams; ++j) { if ((streamXfbBuffers[j] & (1 << i)) != 0) { - // NOTE: According to GLSL spec, all outputs assigned to a given transform feedback buffer are required to + // NOTE: According to GLSL spec, all outputs assigned to a given XFB buffer are required to // come from a single vertex stream. xfbBufferToStream[i] = j; break; @@ -7121,7 +7481,7 @@ void NggPrimShader::prepareSwXfb(ArrayRef primCountInSubgroup) { for (unsigned i = 0; i < MaxTransformFeedbackBuffers; ++i) { if (!bufferActive[i]) - continue; // Transform feedback buffer is inactive + continue; // XFB buffer is inactive if (firstActiveBuffer == InvalidValue) firstActiveBuffer = i; @@ -7136,6 +7496,10 @@ void NggPrimShader::prepareSwXfb(ArrayRef primCountInSubgroup) { Value *dwordsWritten[MaxTransformFeedbackBuffers] = {}; + // NOTE: HW requires us to insert s_waitcnt lgkmcnt(0) following each GDS ordered count instruction. + // This is to avoid outstanding GDS instructions, which cause problems in GDS synchronization. + SyncScope::ID workgroupScope = m_builder.getContext().getOrInsertSyncScopeID("workgroup"); + // Calculate numPrimsToWrite for (unsigned i = 0; i < MaxTransformFeedbackBuffers; ++i) { if (!bufferActive[i]) @@ -7157,12 +7521,14 @@ void NggPrimShader::prepareSwXfb(ArrayRef primCountInSubgroup) { m_builder.getFalse(), // wave release m_builder.getFalse(), // wave done }); + m_builder.CreateFence(AtomicOrdering::Release, workgroupScope); } else { // ds_add_gs_reg dwordsWritten[i] = m_builder.CreateIntrinsic(Intrinsic::amdgcn_ds_add_gs_reg_rtn, m_builder.getInt32Ty(), {m_builder.getInt32(0), // value to add m_builder.getInt32((GDS_STRMOUT_DWORDS_WRITTEN_0 + i) << 2)}); // count index + m_builder.CreateFence(AtomicOrdering::Release, workgroupScope); } // NUM_RECORDS = SQ_BUF_RSRC_WORD2 @@ -7200,37 +7566,45 @@ void NggPrimShader::prepareSwXfb(ArrayRef primCountInSubgroup) { m_builder.getTrue(), // wave release m_builder.getTrue(), // wave done }); + m_builder.CreateFence(AtomicOrdering::Release, workgroupScope); } else { // ds_add_gs_reg dwordsWritten[i] = m_builder.CreateIntrinsic(Intrinsic::amdgcn_ds_add_gs_reg_rtn, dwordsToWrite->getType(), {dwordsToWrite, // value to add m_builder.getInt32((GDS_STRMOUT_DWORDS_WRITTEN_0 + i) << 2)}); // count index + m_builder.CreateFence(AtomicOrdering::Release, workgroupScope); } } - // Store transform feedback statistics info to LDS and GDS + // Update GDS primitive statistics counters const unsigned regionStart = getLdsRegionStart(PrimShaderLdsRegion::XfbStats); - for (unsigned i = 0; i < MaxTransformFeedbackBuffers; ++i) { - if (!bufferActive[i]) - continue; - - writeValueToLds(dwordsWritten[i], m_builder.getInt32(regionStart + i)); - } - for (unsigned i = 0; i < MaxGsStreams; ++i) { if (!m_pipelineState->isVertexStreamActive(i)) continue; - writeValueToLds(numPrimsToWrite[i], m_builder.getInt32(regionStart + MaxTransformFeedbackBuffers + i)); - m_builder.CreateIntrinsic(Intrinsic::amdgcn_ds_add_gs_reg_rtn, primCountInSubgroup[i]->getType(), {primCountInSubgroup[i], // value to add m_builder.getInt32((GDS_STRMOUT_PRIMS_NEEDED_0 + 2 * i) << 2)}); // count index + m_builder.CreateFence(AtomicOrdering::Release, workgroupScope); m_builder.CreateIntrinsic(Intrinsic::amdgcn_ds_add_gs_reg_rtn, numPrimsToWrite[i]->getType(), {numPrimsToWrite[i], // value to add m_builder.getInt32((GDS_STRMOUT_PRIMS_WRITTEN_0 + 2 * i) << 2)}); // count index + m_builder.CreateFence(AtomicOrdering::Release, workgroupScope); + } + + // Store XFB statistics info to LDS + for (unsigned i = 0; i < MaxTransformFeedbackBuffers; ++i) { + if (!bufferActive[i]) + continue; + writeValueToLds(dwordsWritten[i], m_builder.getInt32(regionStart + i)); + } + + for (unsigned i = 0; i < MaxGsStreams; ++i) { + if (!m_pipelineState->isVertexStreamActive(i)) + continue; + writeValueToLds(numPrimsToWrite[i], m_builder.getInt32(regionStart + MaxTransformFeedbackBuffers + i)); } m_builder.CreateBr(endPrepareXfbBlock); @@ -7248,290 +7622,6 @@ void NggPrimShader::prepareSwXfb(ArrayRef primCountInSubgroup) { llvm_unreachable("Not implemented!"); } -// ===================================================================================================================== -// Fetches transform feedback outputs by creating a fetcher cloned from the target function or just mutating -// the target function and running it after that. Meanwhile, we collect the transform feedback export info. -// -// @param target : Target function to process SW emulated transform feedback -// @param args : Arguments of primitive shader entry-point -// @param [out] xfbOutputExports : Export info of transform feedback outputs -Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef args, - SmallVectorImpl &xfbOutputExports) { - assert(m_pipelineState->enableSwXfb()); - - auto resUsage = m_pipelineState->getShaderResourceUsage( - m_hasGs ? ShaderStage::Geometry : (m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex)); - const unsigned xfbOutputCount = resUsage->inOutUsage.xfbExpCount; - - // Skip following handling if transform feedback output is empty - if (xfbOutputCount == 0) - return nullptr; - - // - // Clone the target function or just mutate the target function to fetch transform feedback outputs - // - auto savedInsertPos = m_builder.saveIP(); - - // We don't clone the target function if we are in passthrough mode without GS - const bool makeClone = m_hasGs || !m_nggControl->passthroughMode; - - // Clone or mutate the target function - xfbOutputExports.resize(xfbOutputCount); - - // NOTE: For non-GS transform feedback, the return type is represented as an array of transform feedback outputs; for - // GS transform feedback, the return type is void. This is because output values must be loaded by GS read output - // call. Thus, we don't have to return output values. Instead, we recode the location in transform feedback export - // info and fetch them later. - Type *xfbOutputsTy = ArrayType::get(FixedVectorType::get(m_builder.getInt32Ty(), 4), xfbOutputCount); - Type *xfbReturnTy = m_hasGs ? m_builder.getVoidTy() : xfbOutputsTy; - - Function *xfbFetcher = target; - if (makeClone) { - auto xfbFetcherTy = FunctionType::get(xfbReturnTy, target->getFunctionType()->params(), false); - xfbFetcher = Function::Create(xfbFetcherTy, target->getLinkage(), "", target->getParent()); - - ValueToValueMapTy valueMap; - - Argument *newArg = xfbFetcher->arg_begin(); - for (Argument &arg : target->args()) - valueMap[&arg] = newArg++; - - SmallVector retInsts; - CloneFunctionInto(xfbFetcher, target, valueMap, CloneFunctionChangeType::LocalChangesOnly, retInsts); - xfbFetcher->setName(NggXfbFetcher); - } else { - mutateToExportVertex(target); - - xfbFetcher = addFunctionArgs(target, xfbReturnTy, {}, {}, 0); - - // Original target function is no longer needed - assert(target->use_empty()); - target->eraseFromParent(); - target = nullptr; - } - - // Find the return block - BasicBlock *retBlock = nullptr; - for (BasicBlock &block : *xfbFetcher) { - auto retInst = dyn_cast(block.getTerminator()); - if (retInst) { - retInst->dropAllReferences(); - retInst->eraseFromParent(); - - retBlock = █ - break; - } - } - assert(retBlock); - m_builder.SetInsertPoint(retBlock); - - // Visit XFB writes and vertex position/attribute exports by lowering or removing them, and mutating the return type - SmallVector callsToRemove; - - Value *xfbOutputs = PoisonValue::get(xfbOutputsTy); - unsigned outputIndex = 0; - unsigned offsetInVertex = 0; - - struct Payload { - NggPrimShader &self; - Value *&xfbOutputs; - unsigned &outputIndex; - unsigned &offsetInVertex; - SmallVectorImpl &xfbOutputExports; - SmallVectorImpl &callsToRemove; - }; - Payload payload = {*this, xfbOutputs, outputIndex, offsetInVertex, xfbOutputExports, callsToRemove}; - - static const auto visitor = - llvm_dialects::VisitorBuilder() - .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) - .add([](Payload &payload, WriteXfbOutputOp &writeXfbOutputOp) { - auto &builder = payload.self.m_builder; - - auto xfbBuffer = writeXfbOutputOp.getXfbBuffer(); - auto xfbOffset = writeXfbOutputOp.getXfbOffset(); - auto outputValue = writeXfbOutputOp.getOutputValue(); - assert(outputValue->getType()->getScalarSizeInBits() == 32); - - const unsigned numElements = outputValue->getType()->isVectorTy() - ? cast(outputValue->getType())->getNumElements() - : 1; - assert(numElements <= 4); - - // Those values are just for GS - auto streamId = InvalidValue; - unsigned location = InvalidValue; - unsigned component = InvalidValue; - - if (payload.self.m_hasGs) { - // NOTE: For GS, the output value must be loaded by NggReadGsOutputOp. This is generated by copy - // shader. - NggReadGsOutputOp *readGsOutputOp = dyn_cast(outputValue); - streamId = writeXfbOutputOp.getStreamId(); - assert(streamId == readGsOutputOp->getStreamId()); // Stream ID must match - location = readGsOutputOp->getLocation(); - component = readGsOutputOp->getComponent(); - } else { - // If the output value is floating point, cast it to integer type - if (outputValue->getType()->isFPOrFPVectorTy()) { - if (numElements == 1) { - outputValue = builder.CreateBitCast(outputValue, builder.getInt32Ty()); - } else { - outputValue = - builder.CreateBitCast(outputValue, FixedVectorType::get(builder.getInt32Ty(), numElements)); - } - } - - // Always pad the output value to <4 x i32> - if (numElements == 1) { - outputValue = - builder.CreateInsertElement(PoisonValue::get(FixedVectorType::get(builder.getInt32Ty(), 4)), - outputValue, static_cast(0)); - } else if (numElements < 4) { - outputValue = builder.CreateShuffleVector(outputValue, PoisonValue::get(outputValue->getType()), - ArrayRef({0U, 1U, 2U, 3U})); - } - } - - // For VS/TES, return the output value - if (!payload.self.m_hasGs) - payload.xfbOutputs = builder.CreateInsertValue(payload.xfbOutputs, outputValue, payload.outputIndex); - - // Collect export info - payload.xfbOutputExports[payload.outputIndex].xfbBuffer = xfbBuffer; - payload.xfbOutputExports[payload.outputIndex].xfbOffset = xfbOffset; - payload.xfbOutputExports[payload.outputIndex].numElements = numElements; - - if (payload.self.m_hasGs) { - // Update fields for GS to use - payload.xfbOutputExports[payload.outputIndex].locInfo.streamId = streamId; - payload.xfbOutputExports[payload.outputIndex].locInfo.location = location; - payload.xfbOutputExports[payload.outputIndex].locInfo.component = component; - } else { - // Update the field for ES to use - payload.xfbOutputExports[payload.outputIndex].offsetInVertex = payload.offsetInVertex; - payload.offsetInVertex += numElements; // Increment the offset - } - - ++payload.outputIndex; - - payload.callsToRemove.push_back(&writeXfbOutputOp); - }) - .add([](Payload &payload, NggExportPositionOp &exportPositionOp) { - payload.callsToRemove.push_back(&exportPositionOp); - }) - .add([](Payload &payload, NggExportAttributeOp &exportAttributeOp) { - payload.callsToRemove.push_back(&exportAttributeOp); - }) - .build(); - visitor.visit(payload, *xfbFetcher); - - assert(outputIndex == xfbOutputCount); // Visit all transform feedback export calls - - m_builder.CreateRet(xfbOutputs); - - // Remove XFB writes in original target function - if (makeClone) { - assert(target); - - struct Payload { - SmallVectorImpl &callsToRemove; - }; - Payload payload = {callsToRemove}; - - static const auto visitor = llvm_dialects::VisitorBuilder() - .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) - .add([](Payload &payload, WriteXfbOutputOp &writeXfbOutputOp) { - payload.callsToRemove.push_back(&writeXfbOutputOp); - }) - .build(); - visitor.visit(payload, *target); - } - - for (auto call : callsToRemove) { - call->dropAllReferences(); - call->eraseFromParent(); - } - - m_builder.restoreIP(savedInsertPos); - - // - // Run transform feedback fetch function - // - if (m_hasGs) { - // Copy shader has fixed argument layout - return m_builder.CreateCall(xfbFetcher, {m_nggInputs.threadIdInSubgroup}); - } - - Value *offChipLdsBase = args[ShaderMerger::getSpecialSgprInputIndex(m_gfxIp, EsGs::OffChipLdsBase)]; - offChipLdsBase->setName("offChipLdsBase"); - - Value *userData = args[NumSpecialSgprInputs]; - - ArrayRef vgprArgs(args.begin() + NumSpecialSgprInputs + 1, args.end()); - - Value *tessCoordX = nullptr; - Value *tessCoordY = nullptr; - Value *relPatchId = nullptr; - Value *patchId = nullptr; - - Value *vertexId = nullptr; - Value *relVertexId = PoisonValue::get(m_builder.getInt32Ty()); - // NOTE: VS primitive ID for NGG is specially obtained from primitive ID distribution. - Value *vsPrimitiveId = m_distributedPrimitiveId ? m_distributedPrimitiveId : PoisonValue::get(m_builder.getInt32Ty()); - Value *instanceId = nullptr; - - if (m_gfxIp.major <= 11) { - if (m_hasTes) { - tessCoordX = vgprArgs[5]; - tessCoordY = vgprArgs[6]; - relPatchId = vgprArgs[7]; - patchId = vgprArgs[8]; - } else { - vertexId = vgprArgs[5]; - instanceId = vgprArgs[8]; - } - } else { - llvm_unreachable("Not implemented!"); - } - - SmallVector xfbFetcherArgs; - - // If we don't clone the target function, we are going to run it and handle vertex attribute through memory here. - if (!makeClone) { - if (!m_pipelineState->exportAttributeByExportInstruction()) { - if (!m_hasGs) // For GS, ATM is done in copy shader - appendAttributeThroughMemoryArguments(xfbFetcherArgs); - } - } - - // Set up user data SGPRs - const unsigned userDataCount = - m_pipelineState->getShaderInterfaceData(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex)->userDataCount; - appendUserData(xfbFetcherArgs, xfbFetcher, userData, userDataCount); - - if (m_hasTes) { - // Set up system value SGPRs - xfbFetcherArgs.push_back(offChipLdsBase); - - // Set up system value VGPRs - xfbFetcherArgs.push_back(tessCoordX); - xfbFetcherArgs.push_back(tessCoordY); - xfbFetcherArgs.push_back(relPatchId); - xfbFetcherArgs.push_back(patchId); - } else { - // Set up system value VGPRs - xfbFetcherArgs.push_back(vertexId); - xfbFetcherArgs.push_back(relVertexId); - xfbFetcherArgs.push_back(vsPrimitiveId); - xfbFetcherArgs.push_back(instanceId); - } - - assert(xfbFetcherArgs.size() == xfbFetcher->arg_size()); // Must have visit all arguments - - return m_builder.CreateCall(xfbFetcher, xfbFetcherArgs); -} - // ===================================================================================================================== // Collect primitive statistics (primitive statistics counting) and update the values in HW counters. void NggPrimShader::collectPrimitiveStats() { @@ -7770,11 +7860,11 @@ void NggPrimShader::collectPrimitiveStats() { } // ===================================================================================================================== -// Reads transform feedback output from LDS +// Reads XFB output from LDS // // @param readDataTy : Data read from LDS // @param vertexIndex: Relative vertex index in NGG subgroup -// @param offsetInVertex : Output offset within all transform feedback outputs of a vertex (in dwords) +// @param offsetInVertex : Output offset within all XFB outputs of a vertex (in dwords) Value *NggPrimShader::readXfbOutputFromLds(Type *readDataTy, Value *vertexIndex, unsigned offsetInVertex) { assert(m_pipelineState->enableSwXfb()); // SW-emulated stream-out must be enabled assert(!m_hasGs); @@ -7789,7 +7879,7 @@ Value *NggPrimShader::readXfbOutputFromLds(Type *readDataTy, Value *vertexIndex, return readValueFromLds(readDataTy, ldsOffset); } - // NOTE: For NGG culling mode, transform feedback outputs are part of vertex cull info. + // NOTE: For NGG culling mode, XFB outputs are part of vertex cull info. const auto regionStart = getLdsRegionStart(PrimShaderLdsRegion::VertexCullInfo); Value *ldsOffset = m_builder.CreateAdd( vertexItemOffset, m_builder.getInt32(regionStart + m_vertCullInfoOffsets.xfbOutputs + offsetInVertex)); @@ -7797,11 +7887,11 @@ Value *NggPrimShader::readXfbOutputFromLds(Type *readDataTy, Value *vertexIndex, } // ===================================================================================================================== -// Writes transform feedback output from LDS +// Writes XFB output from LDS // // @param writeData : Data written to LDS // @param vertexIndex: Relative vertex index in NGG subgroup -// @param offsetInVertex : Output offset within all transform feedback outputs of a vertex (in dwords) +// @param offsetInVertex : Output offset within all XFB outputs of a vertex (in dwords) void NggPrimShader::writeXfbOutputToLds(Value *writeData, Value *vertexIndex, unsigned offsetInVertex) { assert(m_pipelineState->enableSwXfb()); // SW-emulated stream-out must be enabled assert(!m_hasGs); @@ -7817,7 +7907,7 @@ void NggPrimShader::writeXfbOutputToLds(Value *writeData, Value *vertexIndex, un return; } - // NOTE: For NGG culling mode, transform feedback outputs are part of vertex cull info. + // NOTE: For NGG culling mode, XFB outputs are part of vertex cull info. const auto regionStart = getLdsRegionStart(PrimShaderLdsRegion::VertexCullInfo); Value *ldsOffset = m_builder.CreateAdd( vertexItemOffset, m_builder.getInt32(regionStart + m_vertCullInfoOffsets.xfbOutputs + offsetInVertex)); diff --git a/lgc/lowering/NggPrimShader.h b/lgc/lowering/NggPrimShader.h index 5d1cb13ab6..6b70653a2c 100644 --- a/lgc/lowering/NggPrimShader.h +++ b/lgc/lowering/NggPrimShader.h @@ -147,19 +147,27 @@ struct VertexCullInfoOffsets { unsigned relPatchId; }; -// Represents export info of a transform feedback output -struct XfbOutputExport { - unsigned xfbBuffer; // Transform feedback buffer - unsigned xfbOffset; // Transform feedback offset - unsigned numElements; // Number of output elements, valid range is [1,4] +// Represents export info of a vertex +struct VertexExport { + unsigned exportSlot; // Export slot + unsigned channelMask; // Channel mask + llvm::Value *exportValue; // Export values +}; + +// Represents export info of a XFB output +struct XfbExport { + unsigned xfbBuffer; // XFB buffer + unsigned xfbOffset; // XFB offset + unsigned numElements; // Number of export elements, valid range is [1,4] // For ES only - unsigned offsetInVertex; // Offset of an output within all transform feedback outputs of a vertex + unsigned offsetInVertex; // Offset of a XFB output within all XFB outputs of a vertex // For GS only struct { - unsigned streamId; // Output stream ID - unsigned location; // Output location - unsigned component; // Output component within a location - } locInfo; // Output location info in GS-VS ring + unsigned streamId; // Output stream ID + unsigned location; // Output location + unsigned component; // Output component within a location + } locInfo; // Output location info in GS-VS ring + llvm::Value *exportValue; // Export value of a XFB output }; // Enumerates the LDS regions used by primitive shader @@ -238,13 +246,9 @@ class NggPrimShader { void earlyExitWithDummyExport(); void runEs(llvm::ArrayRef args); - llvm::Value *runPartEs(llvm::ArrayRef args, llvm::Value *position = nullptr); void splitEs(); - void runGs(llvm::ArrayRef args); void mutateGs(); - - void runCopyShader(llvm::ArrayRef args); void mutateCopyShader(); void appendUserData(llvm::SmallVectorImpl &args, llvm::Function *target, llvm::Value *userData, @@ -298,17 +302,23 @@ class NggPrimShader { llvm::Value *fetchCullDistanceSignMask(llvm::Value *vertexIndex); llvm::Value *calcVertexItemOffset(unsigned streamId, llvm::Value *vertexIndex); - void appendAttributeThroughMemoryArguments(llvm::SmallVectorImpl &args); - void mutateToExportVertex(llvm::Function *&target); - void exportPosition(unsigned exportSlot, llvm::ArrayRef exportValues, bool lastExport); - void exportAttribute(unsigned exportSlot, llvm::ArrayRef exportValues, llvm::Value *attribRingBufDesc, - llvm::Value *attribRingBaseOffset, llvm::Value *vertexIndex); - - void processSwXfb(llvm::ArrayRef args); - void processSwXfbWithGs(llvm::ArrayRef args); + llvm::Function *makeExportCollector(llvm::Function *&fromFunc, bool makeClone, + llvm::SmallVectorImpl *positionExports, + llvm::SmallVectorImpl *attributeExports, + llvm::SmallVectorImpl *xfbExports); + void collectExports(llvm::ArrayRef args, llvm::Function *&fromFunc, bool makeClone, + llvm::SmallVectorImpl *positionExports, + llvm::SmallVectorImpl *attributeExports, + llvm::SmallVectorImpl *xfbExports); + void createPhiForExports(llvm::SmallVectorImpl *positionExports, + llvm::SmallVectorImpl *attributeExports, + llvm::SmallVectorImpl *xfbExports); + void exportPositions(const llvm::SmallVectorImpl &positionExports); + void exportAttributes(const llvm::SmallVectorImpl &attributeExports); + + void processSwXfb(llvm::ArrayRef args, const llvm::SmallVectorImpl &xfbExports); + void processSwXfbWithGs(llvm::ArrayRef args, const llvm::SmallVectorImpl &xfbExports); void prepareSwXfb(llvm::ArrayRef primCountInSubgroup); - llvm::Value *fetchXfbOutput(llvm::Function *target, llvm::ArrayRef args, - llvm::SmallVectorImpl &xfbOutputExports); llvm::Value *readXfbOutputFromLds(llvm::Type *readDataTy, llvm::Value *vertexIndex, unsigned offsetInVertex); void writeXfbOutputToLds(llvm::Value *writeData, llvm::Value *vertexIndex, unsigned offsetInVertex); @@ -373,9 +383,10 @@ class NggPrimShader { // ES handlers struct { - llvm::Function *main; // ES main function - llvm::Function *cullDataFetcher; // Part ES to fetch cull data (position and cull distance) - llvm::Function *vertexExporter; // Part ES to do deferred vertex exporting + llvm::Function *main; // ES main function + // Part ES functions, the first part contains cull data exports (position and cull distance) and the second part + // contains other remaining exports. + std::pair part; } m_esHandlers = {}; // GS handlers diff --git a/lgc/lowering/PassRegistry.inc b/lgc/lowering/PassRegistry.inc index 71aecd4119..862ac9735e 100644 --- a/lgc/lowering/PassRegistry.inc +++ b/lgc/lowering/PassRegistry.inc @@ -83,7 +83,7 @@ LLPC_MODULE_PASS("lgc-frag-color-export", LowerFragmentColorExport) LLPC_MODULE_PASS("lgc-lower-debug-printf", LowerDebugPrintf) LLPC_MODULE_PASS("lgc-lower-desc", LowerDesc) -#if LLPC_BUILD_STRIX1 +#if LLPC_BUILD_STRIX1 || LLPC_BUILD_STRIX_HALO LLPC_MODULE_PASS("lgc-workaround-ds-subdword-write", WorkaroundDsSubdwordWrite) #endif diff --git a/lgc/lowering/PreparePipelineAbi.cpp b/lgc/lowering/PreparePipelineAbi.cpp index fc3e633cd1..25a323e4d4 100644 --- a/lgc/lowering/PreparePipelineAbi.cpp +++ b/lgc/lowering/PreparePipelineAbi.cpp @@ -32,6 +32,7 @@ #include "MeshTaskShader.h" #include "RegisterMetadataBuilder.h" #include "ShaderMerger.h" +#include "lgc/Debug.h" #include "lgc/state/PalMetadata.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Pass.h" @@ -283,6 +284,21 @@ void PreparePipelineAbi::writeHsOutputs(PipelineState *pipelineState, Value *off // // Check if this patch could be discarded // + + // NOTE: Here, we dynamically set FP32 denorm mode to allow inout denorms. This is because TFs with denorm values + // will be flushed to zeros during this check if FP32 denorm mode is not set to allow denorms via FLOAT_MODE + // register field. + // + // MODE[7:4] = FP_DENORM, [5:4] = Single precision denorm mode, [7:6]= Double precision and FP16 denormal mode + // Mode: + // 0 = flush input and output denorms + // 1 = allow input denorms, flush output denorms + // 2 = flush input denorms, allow output denorms + // 3 = allow input and output denorms + static const unsigned HWRegMode = 1; + static const unsigned AllowInOutDenorms = 3; + builder.CreateSetReg(HWRegMode, 4, 4, builder.getInt32(AllowInOutDenorms)); + Value *minOuterTf = builder.CreateExtractElement(outerTf, static_cast(0)); for (unsigned i = 1; i < cast(outerTf->getType())->getNumElements(); ++i) minOuterTf = builder.CreateBinaryIntrinsic(Intrinsic::minnum, minOuterTf, builder.CreateExtractElement(outerTf, i)); @@ -294,78 +310,73 @@ void PreparePipelineAbi::writeHsOutputs(PipelineState *pipelineState, Value *off // Write HS outputs to off-chip LDS buffer if this patch is valid // auto &inOutUsage = pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->inOutUsage; - const auto &builtInUsage = pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->builtInUsage.tcs; - const auto &hwConfig = inOutUsage.tcs.hwConfig; + const auto &nextInOutUsage = pipelineState->getShaderResourceUsage(ShaderStage::TessEval)->inOutUsage; - // Check if we don't need to write this built-in to off-chip LDS buffer because it is only accessed by HS - auto checkBuiltInNotToWrite = [&](unsigned builtIn) { - if (pipelineState->getNextShaderStage(ShaderStage::TessControl) == ShaderStage::TessEval) { - auto nextInOutStage = pipelineState->getShaderResourceUsage(ShaderStage::TessEval)->inOutUsage; - if (builtIn == BuiltInTessLevelOuter || builtIn == BuiltInTessLevelInner) { - if (inOutUsage.perPatchBuiltInOutputLocMap.count(builtIn) > 0 && - nextInOutStage.perPatchBuiltInInputLocMap.count(builtIn) == 0) - return true; - } else { - if (inOutUsage.builtInOutputLocMap.count(builtIn) > 0 && nextInOutStage.builtInInputLocMap.count(builtIn) == 0) - return true; - } - } - return false; - }; + const auto &builtInUsage = pipelineState->getShaderResourceUsage(ShaderStage::TessControl)->builtInUsage.tcs; + const auto &nextBuiltInUsage = pipelineState->getShaderResourceUsage(ShaderStage::TessEval)->builtInUsage.tes; - static const unsigned BufferFormatsGfx10[] = {BUF_FORMAT_32_FLOAT, BUF_FORMAT_32_32_FLOAT_GFX10, - BUF_FORMAT_32_32_32_FLOAT_GFX10, BUF_FORMAT_32_32_32_32_FLOAT_GFX10}; - static const unsigned BufferFormatsGfx11[] = {BUF_FORMAT_32_FLOAT, BUF_FORMAT_32_32_FLOAT_GFX11, - BUF_FORMAT_32_32_32_FLOAT_GFX11, BUF_FORMAT_32_32_32_32_FLOAT_GFX11}; + const auto &hwConfig = inOutUsage.tcs.hwConfig; + const bool hasTes = pipelineState->hasShaderStage(ShaderStage::TessEval); const auto gfxIp = pipelineState->getTargetInfo().getGfxIpVersion(); - ArrayRef bufferFormats(gfxIp.major == 10 ? BufferFormatsGfx10 : BufferFormatsGfx11); + const unsigned bufferFormat = + gfxIp.major >= 11 ? BUF_FORMAT_32_32_32_32_FLOAT_GFX11 : BUF_FORMAT_32_32_32_32_FLOAT_GFX10; CoherentFlag coherent = {}; if (gfxIp.major <= 11) { coherent.bits.glc = true; } - // Write per-vertex HS outputs to off-chip LDS buffer - if (inOutUsage.outputMapLocCount > 0) { - SmallDenseSet builtInLocsNotToWrite; - SmallDenseMap builtInLocsToTypes; + LLPC_OUTS("===============================================================================\n"); + LLPC_OUTS("// LLPC HS output write info\n\n"); - for (const auto &[builtIn, loc] : inOutUsage.builtInOutputLocMap) { - if (checkBuiltInNotToWrite(builtIn)) { - assert(inOutUsage.builtInOutputLocMap.count(builtIn) > 0); - builtInLocsNotToWrite.insert(inOutUsage.builtInOutputLocMap[builtIn]); - } else { - switch (builtIn) { - case BuiltInPosition: - builtInLocsToTypes[loc] = FixedVectorType::get(builder.getFloatTy(), 4); - break; - case BuiltInPointSize: - builtInLocsToTypes[loc] = builder.getFloatTy(); - break; - case BuiltInClipDistance: - case BuiltInCullDistance: { - const unsigned clipOrCullDistance = - builtIn == BuiltInClipDistance ? builtInUsage.clipDistance : builtInUsage.cullDistance; - assert(clipOrCullDistance > 0 && clipOrCullDistance <= 8); - - builtInLocsToTypes[loc] = clipOrCullDistance == 1 - ? builder.getFloatTy() - : FixedVectorType::get(builder.getFloatTy(), std::min(clipOrCullDistance, 4U)); - if (clipOrCullDistance > 4) { - builtInLocsToTypes[loc + 1] = clipOrCullDistance == 5 - ? builder.getFloatTy() - : FixedVectorType::get(builder.getFloatTy(), clipOrCullDistance - 4); - } - - break; + // HS output write info ( + struct HsOutputWriteInfo { + unsigned onChipLoc; // Location in on-chip LDS + unsigned builtIn; // Whether for a built-in + }; + + // Write per-vertex HS outputs to off-chip LDS buffer (to next stage) + unsigned offChipLocCount = hasTes ? nextInOutUsage.inputMapLocCount : inOutUsage.outputMapLocCount; + if (offChipLocCount > 0) { + LLPC_OUTS("Per-vertex Outputs [OnChip, OffChip]:\n"); + + SmallDenseMap hsOutputWrites; + + // Check generic outputs + const auto &genericOffChipLocMap = hasTes ? nextInOutUsage.inputLocInfoMap : inOutUsage.outputLocInfoMap; + auto &genericOnChipLocMap = inOutUsage.outputLocInfoMap; + + for (const auto &[origLocInfo, offChipLocInfo] : genericOffChipLocMap) { + const unsigned offChipLoc = offChipLocInfo.getLocation(); + if (hsOutputWrites.count(offChipLoc) == 0) { + assert(genericOnChipLocMap.count(origLocInfo) > 0); + hsOutputWrites[offChipLoc].onChipLoc = genericOnChipLocMap[origLocInfo].getLocation(); + hsOutputWrites[offChipLoc].builtIn = InvalidValue; + } + } + + // Check built-in outputs + const auto &builtInOffChipLocMap = hasTes ? nextInOutUsage.builtInInputLocMap : inOutUsage.builtInOutputLocMap; + auto &builtInOnChipLocMap = inOutUsage.builtInOutputLocMap; + + for (const auto &[builtIn, offChipLoc] : builtInOffChipLocMap) { + assert(builtInOnChipLocMap.count(builtIn) > 0); + hsOutputWrites[offChipLoc].onChipLoc = builtInOnChipLocMap[builtIn]; + hsOutputWrites[offChipLoc].builtIn = builtIn; + + if (builtIn == BuiltInClipDistance || builtIn == BuiltInCullDistance) { + unsigned clipOrCullDistance = 0; + if (hasTes) { + clipOrCullDistance = + builtIn == BuiltInClipDistance ? nextBuiltInUsage.clipDistanceIn : nextBuiltInUsage.cullDistanceIn; + } else { + clipOrCullDistance = builtIn == BuiltInClipDistance ? builtInUsage.clipDistance : builtInUsage.cullDistance; } - case BuiltInViewportIndex: - case BuiltInLayer: - builtInLocsToTypes[loc] = builder.getInt32Ty(); - break; - default: - llvm_unreachable("Unexpected built-in"); - break; + assert(clipOrCullDistance > 0 && clipOrCullDistance <= 8); + + if (clipOrCullDistance > 4) { + hsOutputWrites[offChipLoc + 1].onChipLoc = builtInOnChipLocMap[builtIn] + 1; + hsOutputWrites[offChipLoc + 1].builtIn = builtIn; } } } @@ -382,60 +393,70 @@ void PreparePipelineAbi::writeHsOutputs(PipelineState *pipelineState, Value *off offChipLdsBaseOffset, builder.CreateMul(vertexIdx, builder.getInt32(hwConfig.offChip.outputVertexStride))); offChipLdsBaseOffset = builder.CreateAdd(offChipLdsBaseOffset, builder.getInt32(hwConfig.offChip.outputPatchStart)); - for (unsigned loc = 0; loc < inOutUsage.outputMapLocCount; ++loc) { - if (builtInLocsNotToWrite.count(loc) > 0) - continue; + for (unsigned offChipLoc = 0; offChipLoc < offChipLocCount; ++offChipLoc) { + if (hsOutputWrites.count(offChipLoc) == 0) + continue; // Skip the location if it is not recorded (unlinked pipeline) - Type *outputTy = FixedVectorType::get(builder.getInt32Ty(), 4); // <4 x i32> for generic outputs - if (builtInLocsToTypes.count(loc) > 0) - outputTy = builtInLocsToTypes[loc]; // Built-in outputs have known types + const unsigned onChipLoc = hsOutputWrites[offChipLoc].onChipLoc; + const unsigned builtIn = hsOutputWrites[offChipLoc].builtIn; - const unsigned numComponents = outputTy->isVectorTy() ? cast(outputTy)->getNumElements() : 1; + LLPC_OUTS("location = [" << onChipLoc << ", " << offChipLoc << "]"); + if (builtIn != InvalidValue) { + LLPC_OUTS(" (builtin = " << PipelineState::getBuiltInName(static_cast(builtIn)) << ")"); + } + LLPC_OUTS("\n"); // ldsOffset = baseOffset + attribOffset - auto attribOffset = builder.getInt32(4 * loc); + auto attribOffset = builder.getInt32(4 * onChipLoc); auto onChipLdsOffset = builder.CreateAdd(onChipLdsBaseOffset, attribOffset); - auto output = readValueFromLds(outputTy, onChipLdsOffset); + auto output = readValueFromLds(FixedVectorType::get(builder.getInt32Ty(), 4), onChipLdsOffset); + attribOffset = builder.getInt32(4 * offChipLoc); auto offChipLdsOffset = builder.CreateAdd(offChipLdsBaseOffset, attribOffset); offChipLdsOffset = builder.CreateMul(offChipLdsOffset, builder.getInt32(4)); // Convert to byte offset builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_raw_tbuffer_store, - {output, // vdata - offChipLdsDesc, // rsrc - offChipLdsOffset, // voffset - offChipLdsBase, // soffset - builder.getInt32(bufferFormats[numComponents - 1]), // format - builder.getInt32(coherent.u32All)}); // glc + {output, // vdata + offChipLdsDesc, // rsrc + offChipLdsOffset, // voffset + offChipLdsBase, // soffset + builder.getInt32(bufferFormat), // format + builder.getInt32(coherent.u32All)}); // glc } + + LLPC_OUTS("\n"); } - // Write per-patch HS outputs to off-chip LDS buffer - if (inOutUsage.perPatchOutputMapLocCount > 0) { - SmallDenseSet builtInLocsNotToWrite; - SmallDenseMap builtInLocsToTypes; + // Write per-patch HS outputs to off-chip LDS buffer (to next stage) + offChipLocCount = hasTes ? nextInOutUsage.perPatchInputMapLocCount : inOutUsage.perPatchOutputMapLocCount; + if (offChipLocCount > 0) { + LLPC_OUTS("Per-patch Outputs [OnChip, OffChip]:\n"); - for (const auto &[builtIn, loc] : inOutUsage.perPatchBuiltInOutputLocMap) { - if (checkBuiltInNotToWrite(builtIn)) { - assert(inOutUsage.perPatchBuiltInOutputLocMap.count(builtIn) > 0); - builtInLocsNotToWrite.insert(inOutUsage.perPatchBuiltInOutputLocMap[builtIn]); - } else { - Type *type = nullptr; - switch (builtIn) { - case BuiltInTessLevelOuter: - type = FixedVectorType::get(builder.getFloatTy(), 4); - break; - case BuiltInTessLevelInner: - type = FixedVectorType::get(builder.getFloatTy(), 2); - break; - default: - llvm_unreachable("Unexpected built-in"); - break; - } - builtInLocsToTypes[loc] = type; + SmallDenseMap hsOutputWrites; + + // Check generic outputs + const auto &genericOffChipLocMap = hasTes ? nextInOutUsage.perPatchInputLocMap : inOutUsage.perPatchOutputLocMap; + auto &genericOnChipLocMap = inOutUsage.perPatchOutputLocMap; + + for (const auto &[origLoc, offChipLoc] : genericOffChipLocMap) { + if (hsOutputWrites.count(offChipLoc) == 0) { + assert(genericOnChipLocMap.count(origLoc) > 0); + hsOutputWrites[offChipLoc].onChipLoc = genericOnChipLocMap[origLoc]; + hsOutputWrites[offChipLoc].builtIn = InvalidValue; } } + // Check built-in outputs + const auto &builtInOffChipLocMap = + hasTes ? nextInOutUsage.perPatchBuiltInInputLocMap : inOutUsage.perPatchBuiltInOutputLocMap; + auto &builtInOnChipLocMap = inOutUsage.perPatchBuiltInOutputLocMap; + + for (const auto &[builtIn, offChipLoc] : builtInOffChipLocMap) { + assert(builtInOnChipLocMap.count(builtIn) > 0); + hsOutputWrites[offChipLoc].onChipLoc = builtInOnChipLocMap[builtIn]; + hsOutputWrites[offChipLoc].builtIn = builtIn; + } + // baseOffset = patchConstStart + relPatchId * patchConstSize auto onChipLdsBaseOffset = builder.CreateMul(relPatchId, builder.getInt32(hwConfig.onChip.patchConstSize)); onChipLdsBaseOffset = builder.CreateAdd(onChipLdsBaseOffset, builder.getInt32(hwConfig.onChip.patchConstStart)); @@ -443,32 +464,38 @@ void PreparePipelineAbi::writeHsOutputs(PipelineState *pipelineState, Value *off auto offChipLdsBaseOffset = builder.CreateMul(relPatchId, builder.getInt32(hwConfig.offChip.patchConstSize)); offChipLdsBaseOffset = builder.CreateAdd(offChipLdsBaseOffset, builder.getInt32(hwConfig.offChip.patchConstStart)); - for (unsigned loc = 0; loc < inOutUsage.perPatchOutputMapLocCount; ++loc) { - if (builtInLocsNotToWrite.count(loc) > 0) - continue; + for (unsigned offChipLoc = 0; offChipLoc < offChipLocCount; ++offChipLoc) { + if (hsOutputWrites.count(offChipLoc) == 0) + continue; // Skip the location if it is not recorded (unlinked pipeline) - Type *outputTy = FixedVectorType::get(builder.getInt32Ty(), 4); // <4 x i32> for generic outputs - if (builtInLocsToTypes.count(loc) > 0) - outputTy = builtInLocsToTypes[loc]; // Built-in outputs have known types + const unsigned onChipLoc = hsOutputWrites[offChipLoc].onChipLoc; + const unsigned builtIn = hsOutputWrites[offChipLoc].builtIn; - const unsigned numComponents = outputTy->isVectorTy() ? cast(outputTy)->getNumElements() : 1; + LLPC_OUTS("location = [" << onChipLoc << ", " << offChipLoc << "]"); + if (builtIn != InvalidValue) { + LLPC_OUTS(" (builtin = " << PipelineState::getBuiltInName(static_cast(builtIn)) << ")"); + } + LLPC_OUTS("\n"); // ldsOffset = baseOffset + attribOffset - auto attribOffset = builder.getInt32(4 * loc); + auto attribOffset = builder.getInt32(4 * onChipLoc); auto onChipLdsOffset = builder.CreateAdd(onChipLdsBaseOffset, attribOffset); - auto output = readValueFromLds(outputTy, onChipLdsOffset); + auto output = readValueFromLds(FixedVectorType::get(builder.getInt32Ty(), 4), onChipLdsOffset); + attribOffset = builder.getInt32(4 * offChipLoc); auto offChipLdsOffset = builder.CreateAdd(offChipLdsBaseOffset, attribOffset); offChipLdsOffset = builder.CreateMul(offChipLdsOffset, builder.getInt32(4)); // Convert to byte offset builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_raw_tbuffer_store, - {output, // vdata - offChipLdsDesc, // rsrc - offChipLdsOffset, // voffset - offChipLdsBase, // soffset - builder.getInt32(bufferFormats[numComponents - 1]), // format - builder.getInt32(coherent.u32All)}); // glc + {output, // vdata + offChipLdsDesc, // rsrc + offChipLdsOffset, // voffset + offChipLdsBase, // soffset + builder.getInt32(bufferFormat), // format + builder.getInt32(coherent.u32All)}); // glc } + + LLPC_OUTS("\n"); } } diff --git a/lgc/lowering/RegisterMetadataBuilder.cpp b/lgc/lowering/RegisterMetadataBuilder.cpp index 8a2df10879..0e98d11ce4 100644 --- a/lgc/lowering/RegisterMetadataBuilder.cpp +++ b/lgc/lowering/RegisterMetadataBuilder.cpp @@ -551,13 +551,12 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() { getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::VgtDrawPrimPayloadEn] = hasPrimitivePayload; // Pipeline metadata: mesh_linear_dispatch_from_task - bool meshLinearDispatchFromTask = false; if (m_hasTask) { - meshLinearDispatchFromTask = + const bool meshLinearDispatchFromTask = m_pipelineState->getShaderResourceUsage(ShaderStage::Task)->builtInUsage.task.meshLinearDispatch; + getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::MeshLinearDispatchFromTask] = + meshLinearDispatchFromTask; } - getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::MeshLinearDispatchFromTask] = - meshLinearDispatchFromTask; if (m_gfxIp.major >= 11) { // SPI_SHADER_GS_MESHLET_DIM @@ -1171,6 +1170,7 @@ void RegisterMetadataBuilder::buildPaSpecificRegisters() { bool useViewportIndex = false; bool useViewportIndexImplicitly = false; bool useShadingRate = false; + bool useEdgeFlag = false; unsigned clipDistanceCount = 0; unsigned cullDistanceCount = 0; @@ -1238,6 +1238,7 @@ void RegisterMetadataBuilder::buildPaSpecificRegisters() { useLayer = builtInUsage.layer; useViewportIndex = builtInUsage.viewportIndex; useShadingRate = builtInUsage.primitiveShadingRate; + useEdgeFlag = builtInUsage.edgeFlag; clipDistanceCount = builtInUsage.clipDistance; cullDistanceCount = builtInUsage.cullDistance; @@ -1299,7 +1300,7 @@ void RegisterMetadataBuilder::buildPaSpecificRegisters() { bool miscExport = usePointSize; if (!meshPipeline) { // NOTE: Those built-ins are exported through primitive payload for mesh pipeline rather than vertex position data. - miscExport |= useLayer || useViewportIndex || useShadingRate; + miscExport |= useLayer || useViewportIndex || useShadingRate || useEdgeFlag; } if (miscExport) { @@ -1316,6 +1317,7 @@ void RegisterMetadataBuilder::buildPaSpecificRegisters() { // data. paClVsOutCntl[Util::Abi::PaClVsOutCntlMetadataKey::UseVtxRenderTargetIndx] = useLayer; paClVsOutCntl[Util::Abi::PaClVsOutCntlMetadataKey::UseVtxViewportIndx] = useViewportIndex; + paClVsOutCntl[Util::Abi::PaClVsOutCntlMetadataKey::UseVtxEdgeFlag] = useEdgeFlag; if (useShadingRate) { assert(m_gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ diff --git a/lgc/lowering/SetupTargetFeatures.cpp b/lgc/lowering/SetupTargetFeatures.cpp index fdd7ce5e44..f15d45147e 100644 --- a/lgc/lowering/SetupTargetFeatures.cpp +++ b/lgc/lowering/SetupTargetFeatures.cpp @@ -102,13 +102,22 @@ void SetUpTargetFeatures::setupTargetFeatures(Module *module) { } if (isShaderEntryPoint(&*func)) { - bool useSiScheduler = m_pipelineState->getShaderOptions(shaderStage.value()).useSiScheduler; - if (useSiScheduler) { + const ShaderOptions &options = m_pipelineState->getShaderOptions(shaderStage.value()); + if (options.useSiScheduler) { // It was found that enabling both SIScheduler and SIFormClauses was bad on one particular // game. So we disable the latter here. That only affects XNACK targets. targetFeatures += ",+si-scheduler"; builder.addAttribute("amdgpu-max-memory-clause", "1"); } + + LlvmScheduleStrategy schedStrategy = options.scheduleStrategy; + if (schedStrategy == LlvmScheduleStrategy::MaxMemoryClause) { + builder.addAttribute("amdgpu-sched-strategy", "max-memory-clause"); + // Use a more aggressive value than the default value. This helps clustering more instructions. + builder.addAttribute("amdgpu-max-memory-cluster-dwords", "32"); + } else if (schedStrategy == LlvmScheduleStrategy::MaxIlp) { + builder.addAttribute("amdgpu-sched-strategy", "max-ilp"); + } } auto callingConv = func->getCallingConv(); diff --git a/lgc/lowering/ShaderMerger.cpp b/lgc/lowering/ShaderMerger.cpp index d6179c690f..561af503ff 100644 --- a/lgc/lowering/ShaderMerger.cpp +++ b/lgc/lowering/ShaderMerger.cpp @@ -1015,12 +1015,6 @@ void ShaderMerger::storeTessFactorsAndHsOutputsWithOpt(Value *threadIdInWave, Va // The processing is something like this: // // OPTIMIZED_TF_STORE_AND_HS_OUTPUTS_STORE() { - // if (threadIdInWave < hsVertexCount) { - // Read TFs from LDS (each thread corresponds to an output vertex) - // if (outerTfs > 0.0) - // Write HS outputs to off-chip LDS buffer - // } - // // Read hsPatchCount from LDS // // if (threadIdInGroup < hsPatchCount) { @@ -1048,6 +1042,12 @@ void ShaderMerger::storeTessFactorsAndHsOutputsWithOpt(Value *threadIdInWave, Va // Write TFs to buffer // } // } + // + // if (threadIdInWave < hsVertexCount) { + // Read TFs from LDS (each thread corresponds to an output vertex) + // if (outerTfs > 0.0) + // Write HS outputs to off-chip LDS buffer + // } // } // diff --git a/lgc/lowering/StructurizeBuffers.cpp b/lgc/lowering/StructurizeBuffers.cpp index 4d96d3af55..4245b0da07 100644 --- a/lgc/lowering/StructurizeBuffers.cpp +++ b/lgc/lowering/StructurizeBuffers.cpp @@ -182,7 +182,7 @@ bool StructurizeBuffersImpl::run() { strided = m_builder.create(strided, bufferIndexOp->getIndex()); toRemove.push_back(bufferIndexOp); - CompilerUtils::replaceAllPointerUses(bufferIndexOp, strided, toRemove); + compilerutils::replaceAllPointerUses(bufferIndexOp, strided, toRemove); } } } diff --git a/lgc/state/PipelineState.cpp b/lgc/state/PipelineState.cpp index ffe4ad31eb..7d1e9172ca 100644 --- a/lgc/state/PipelineState.cpp +++ b/lgc/state/PipelineState.cpp @@ -1977,13 +1977,6 @@ void PipelineState::initializeInOutPackState() { m_outputPackState[ShaderStage::TessEval] = true; m_outputPackState[ShaderStage::Geometry] = true; - // NOTE: For mesh shader, we don't do in-out packing currently in that mesh shader could emit per-vertex outputs - // and per-primitive outputs, which introduces additional complexity and this complexity increases with the - // involvement of dynamic indexing. - if (hasShaderStage(ShaderStage::Mesh)) { - m_outputPackState[ShaderStage::Mesh] = false; - m_inputPackState[ShaderStage::Fragment] = false; - } } else { // For unlinked shaders, we can do in-out packing if the pipeline has two adjacent shaders. // We are assuming that if any of the vertex processing, then the vertex processing stages are complete. For @@ -2001,6 +1994,13 @@ void PipelineState::initializeInOutPackState() { m_outputPackState[*preStage] = true; } } + // NOTE: For mesh shader, we don't do in-out packing currently in that mesh shader could emit per-vertex outputs + // and per-primitive outputs, which introduces additional complexity and this complexity increases with the + // involvement of dynamic indexing. + if (hasShaderStage(ShaderStage::Mesh)) { + m_outputPackState[ShaderStage::Mesh] = false; + m_inputPackState[ShaderStage::Fragment] = false; + } } // ===================================================================================================================== diff --git a/lgc/state/TargetInfo.cpp b/lgc/state/TargetInfo.cpp index fb59c8abcf..560415c647 100644 --- a/lgc/state/TargetInfo.cpp +++ b/lgc/state/TargetInfo.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -373,6 +373,20 @@ static void setGfx115FInfo(TargetInfo *targetInfo) { } #endif +#if LLPC_BUILD_STRIX_HALO +// gfx1151 +// +// @param [in/out] targetInfo : Target info +static void setGfx1151Info(TargetInfo *targetInfo) { + setGfx11Info(targetInfo); + + targetInfo->getGpuWorkarounds().gfx11.waAtmPrecedesPos = 1; + + targetInfo->getGpuProperty().numShaderEngines = 1; + targetInfo->getGpuProperty().numComputeUnitsPerShaderEngine = 10; +} +#endif + // Represents device infos. struct GpuNameStringMap { const char *gpuName; @@ -399,6 +413,9 @@ static const GpuNameStringMap GpuNameMap[] = { {"gfx1150", "Strix1", &setGfx1150Info}, // gfx1150 {"gfx115F", "Strix1 A0", &setGfx115FInfo}, // gfx115F #endif +#if LLPC_BUILD_STRIX_HALO + {"gfx1151", "Strix_halo", &setGfx1151Info}, // gfx1151 +#endif }; // ===================================================================================================================== diff --git a/lgc/test/BuiltIns/cs-deviceindex.lgc b/lgc/test/BuiltIns/cs-deviceindex.lgc index 6beeaef4aa..5be5f26453 100644 --- a/lgc/test/BuiltIns/cs-deviceindex.lgc +++ b/lgc/test/BuiltIns/cs-deviceindex.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/BuiltIns/cs-globalinvocationid.lgc b/lgc/test/BuiltIns/cs-globalinvocationid.lgc index 2831e5c6d9..8351d21073 100644 --- a/lgc/test/BuiltIns/cs-globalinvocationid.lgc +++ b/lgc/test/BuiltIns/cs-globalinvocationid.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=GFX10 %s ; RUN: lgc -mcpu=gfx1100 - < %s | FileCheck --check-prefixes=GFX11 %s diff --git a/lgc/test/BuiltIns/cs-localinvocationid.lgc b/lgc/test/BuiltIns/cs-localinvocationid.lgc index f3931dbcb4..52c9acfb6e 100644 --- a/lgc/test/BuiltIns/cs-localinvocationid.lgc +++ b/lgc/test/BuiltIns/cs-localinvocationid.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=GFX10 %s ; RUN: lgc -mcpu=gfx1100 - < %s | FileCheck --check-prefixes=GFX11 %s diff --git a/lgc/test/BuiltIns/cs-localinvocationindex.lgc b/lgc/test/BuiltIns/cs-localinvocationindex.lgc index 2d88ebbd55..31d3fc5e56 100644 --- a/lgc/test/BuiltIns/cs-localinvocationindex.lgc +++ b/lgc/test/BuiltIns/cs-localinvocationindex.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=GFX10 %s ; RUN: lgc -mcpu=gfx1100 - < %s | FileCheck --check-prefixes=GFX11 %s diff --git a/lgc/test/BuiltIns/cs-numsubgroups.lgc b/lgc/test/BuiltIns/cs-numsubgroups.lgc index 2395bfd755..288be48691 100644 --- a/lgc/test/BuiltIns/cs-numsubgroups.lgc +++ b/lgc/test/BuiltIns/cs-numsubgroups.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=WAVE64 %s ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=WAVE32 %s diff --git a/lgc/test/BuiltIns/cs-numworkgroups.lgc b/lgc/test/BuiltIns/cs-numworkgroups.lgc index d7d16e8a66..0697b7c8c4 100644 --- a/lgc/test/BuiltIns/cs-numworkgroups.lgc +++ b/lgc/test/BuiltIns/cs-numworkgroups.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --check-pal-metadata ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=CHECK %s @@ -53,8 +79,8 @@ attributes #0 = { nounwind } ; CHECK-NEXT: .cs: ; CHECK-NEXT: .checksum_value: 0 ; CHECK-NEXT: .debug_mode: false -; CHECK-NEXT: .entry_point{{(_symbol)?}}: _amdgpu_cs_main -; CHECK-NEXT: .excp_en: 0 +; CHECK: .entry_point{{(_symbol)?}}: _amdgpu_cs_main +; CHECK: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 ; CHECK-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0 diff --git a/lgc/test/BuiltIns/cs-subgroupid.lgc b/lgc/test/BuiltIns/cs-subgroupid.lgc index 316fa2fc9a..cd71e45847 100644 --- a/lgc/test/BuiltIns/cs-subgroupid.lgc +++ b/lgc/test/BuiltIns/cs-subgroupid.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=WAVE32 %s ; RUN: lgc -mcpu=gfx1030 - < %s | FileCheck --check-prefixes=GFX1030 %s diff --git a/lgc/test/BuiltIns/cs-subgrouplocalinvocationid.lgc b/lgc/test/BuiltIns/cs-subgrouplocalinvocationid.lgc index 756a62ce63..cbe69ee739 100644 --- a/lgc/test/BuiltIns/cs-subgrouplocalinvocationid.lgc +++ b/lgc/test/BuiltIns/cs-subgrouplocalinvocationid.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=WAVE32 %s diff --git a/lgc/test/BuiltIns/cs-subgroupsize.lgc b/lgc/test/BuiltIns/cs-subgroupsize.lgc index e01e5964bd..7143761560 100644 --- a/lgc/test/BuiltIns/cs-subgroupsize.lgc +++ b/lgc/test/BuiltIns/cs-subgroupsize.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=WAVE32 %s diff --git a/lgc/test/BuiltIns/cs-workgroupid.lgc b/lgc/test/BuiltIns/cs-workgroupid.lgc index b917de05a2..efedc2a831 100644 --- a/lgc/test/BuiltIns/cs-workgroupid.lgc +++ b/lgc/test/BuiltIns/cs-workgroupid.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --check-pal-metadata ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=CHECK %s @@ -50,8 +76,8 @@ attributes #0 = { nounwind } ; CHECK-NEXT: .cs: ; CHECK-NEXT: .checksum_value: 0 ; CHECK-NEXT: .debug_mode: false -; CHECK-NEXT: .entry_point{{(_symbol)?}}: _amdgpu_cs_main -; CHECK-NEXT: .excp_en: 0 +; CHECK: .entry_point{{(_symbol)?}}: _amdgpu_cs_main +; CHECK: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 ; CHECK-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0 diff --git a/lgc/test/BuiltIns/cs-workgroupsize.lgc b/lgc/test/BuiltIns/cs-workgroupsize.lgc index 247bd2223f..45682251cb 100644 --- a/lgc/test/BuiltIns/cs-workgroupsize.lgc +++ b/lgc/test/BuiltIns/cs-workgroupsize.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - < %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/CMakeLists.txt b/lgc/test/CMakeLists.txt index 3669aa8019..dd9b151f4f 100644 --- a/lgc/test/CMakeLists.txt +++ b/lgc/test/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -33,6 +33,12 @@ set(LGC_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # required by configure_lit_site_cfg set(LLVM_LIT_OUTPUT_DIR ${LLVM_TOOLS_BINARY_DIR}) +# read LLVM_MAIN_REVISION from its header file +file(READ "${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/llvm-config.h.cmake" LLVM_CONFIG_CONTENTS) +string(REGEX MATCH "#define LLVM_MAIN_REVISION ([0-9]+)" _UNUSED_VAR "${LLVM_CONFIG_CONTENTS}") +set(LLVM_MAIN_REVISION ${CMAKE_MATCH_1}) # may be empty +message("LLVM_MAIN_REVISION: ${LLVM_MAIN_REVISION}") + # When cross-compiling (or compiling a release tablegen), external llvm projects are defined but other projects are not. # We expect that tests are not run in a cmake configuration where llpc_version does not exist, so we only use its # definitions when it is available. diff --git a/lgc/test/CallLibFromCs-indirect.lgc b/lgc/test/CallLibFromCs-indirect.lgc index 23fd7d5a28..60fa7b801d 100644 --- a/lgc/test/CallLibFromCs-indirect.lgc +++ b/lgc/test/CallLibFromCs-indirect.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Call an extern compute library function from a compute shader. ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-mutate-entry-point -o /dev/null 2>&1 - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/CallLibFromCs.lgc b/lgc/test/CallLibFromCs.lgc index cc372f97c0..74dfb32de9 100644 --- a/lgc/test/CallLibFromCs.lgc +++ b/lgc/test/CallLibFromCs.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Call an extern compute library function from a compute shader. ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-mutate-entry-point -o /dev/null 2>&1 - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/CallLibFromCsPayload.lgc b/lgc/test/CallLibFromCsPayload.lgc index 1ef4c46bf4..0073d583c1 100644 --- a/lgc/test/CallLibFromCsPayload.lgc +++ b/lgc/test/CallLibFromCsPayload.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Call an extern compute library function from a compute shader. ; Ensure that the first argument uses the same registers as the return value of the called function. diff --git a/lgc/test/CleanUndefOutputValues.lgc b/lgc/test/CleanUndefOutputValues.lgc index bc54b21f83..31ee77a5b2 100644 --- a/lgc/test/CleanUndefOutputValues.lgc +++ b/lgc/test/CleanUndefOutputValues.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 5 ; Check the case that undef output value cannot be removed since the location may be re-written with valid value later. diff --git a/lgc/test/ComputeLibraryPushConstantSpill.lgc b/lgc/test/ComputeLibraryPushConstantSpill.lgc index 98861bb749..5185beb35a 100644 --- a/lgc/test/ComputeLibraryPushConstantSpill.lgc +++ b/lgc/test/ComputeLibraryPushConstantSpill.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Test that push constant is correctly marked for spill in compute library even it is not used. ; RUN: lgc -mcpu=gfx1010 -o - - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/CsBPermuteWave64.lgc b/lgc/test/CsBPermuteWave64.lgc index ed2e99d54e..8b562b08b5 100644 --- a/lgc/test/CsBPermuteWave64.lgc +++ b/lgc/test/CsBPermuteWave64.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - --mcpu=gfx1100 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s diff --git a/lgc/test/CsComputeLibrary.lgc b/lgc/test/CsComputeLibrary.lgc index fd957e0bc5..9566b03851 100644 --- a/lgc/test/CsComputeLibrary.lgc +++ b/lgc/test/CsComputeLibrary.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Define a compute library that can be called from a compute shader. ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-mutate-entry-point -print-after=lgc-prepare-pipeline-abi -print-after=lgc-set-up-target-features -o /dev/null 2>&1 - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/CsComputeLibraryPayload.lgc b/lgc/test/CsComputeLibraryPayload.lgc index f9441f5b17..c26373e3a1 100644 --- a/lgc/test/CsComputeLibraryPayload.lgc +++ b/lgc/test/CsComputeLibraryPayload.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 -o - - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/CsLowerDebugPrintf.lgc b/lgc/test/CsLowerDebugPrintf.lgc index 386f866284..0906cc42a3 100644 --- a/lgc/test/CsLowerDebugPrintf.lgc +++ b/lgc/test/CsLowerDebugPrintf.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 -o - -passes=lgc-lower-debug-printf %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/CsReconfigWorkgroup.lgc b/lgc/test/CsReconfigWorkgroup.lgc index 5b1e79335e..811e16f30f 100644 --- a/lgc/test/CsReconfigWorkgroup.lgc +++ b/lgc/test/CsReconfigWorkgroup.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; ---------------------------------------------------------------------- ; Extract 1: Reconfiguring of workgroup size disabled diff --git a/lgc/test/ElfRelocationAndNote.lgc b/lgc/test/ElfRelocationAndNote.lgc index 58732f1adb..d84bc8d023 100644 --- a/lgc/test/ElfRelocationAndNote.lgc +++ b/lgc/test/ElfRelocationAndNote.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; This test case checks that no empty relocation section is generated when there ; is no relocation. It also checks the ISA name is correctly generated in the ; .note section. diff --git a/lgc/test/ElfRelocationSize.lgc b/lgc/test/ElfRelocationSize.lgc index 5af2d3213a..04ea0efa7f 100644 --- a/lgc/test/ElfRelocationSize.lgc +++ b/lgc/test/ElfRelocationSize.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; This test checks that no random extra bytes are generated after the relocations ; in the relocation section (`.rel.text`). To check that, we extract the offsets of ; the relocation section and the following section from the elf and subtract diff --git a/lgc/test/FDot2Gfx1010.lgc b/lgc/test/FDot2Gfx1010.lgc index 6d812bd602..16d3edc599 100644 --- a/lgc/test/FDot2Gfx1010.lgc +++ b/lgc/test/FDot2Gfx1010.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Test fdot2 on gfx1010 because gfx1010 doesn't support llvm.amdgcn.fdot2 (v_dot2_f32_f16) ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o /dev/null 2>&1 - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/ImageSampleNoReturn.lgc b/lgc/test/ImageSampleNoReturn.lgc index f7c0a38f8c..0c040bc0a6 100644 --- a/lgc/test/ImageSampleNoReturn.lgc +++ b/lgc/test/ImageSampleNoReturn.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - --mcpu=gfx1100 --emit-llvm %s | FileCheck -check-prefixes=CHECK %s diff --git a/lgc/test/InOutPackingNonZeroBase.lgc b/lgc/test/InOutPackingNonZeroBase.lgc index 0026c00940..079359ca3e 100644 --- a/lgc/test/InOutPackingNonZeroBase.lgc +++ b/lgc/test/InOutPackingNonZeroBase.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 --print-after=lgc-collect-resource-usage --verify-ir %s -o=/dev/null 2>&1 | FileCheck --check-prefixes=IR %s ; Throw in 'cat' as a hack to prevent update_test_checks from touching the "MAPPING" lines diff --git a/lgc/test/IntToPtrWithAdd.lgc b/lgc/test/IntToPtrWithAdd.lgc index d8337a9226..194e65c1f0 100644 --- a/lgc/test/IntToPtrWithAdd.lgc +++ b/lgc/test/IntToPtrWithAdd.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Change inttoptr ( add x, const ) -> gep ( inttoptr x, const ) ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-peephole-optimization -o - 2>&1 - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/MulDx9Zero.lgc b/lgc/test/MulDx9Zero.lgc index 0e9e6b2bda..53d40f546c 100644 --- a/lgc/test/MulDx9Zero.lgc +++ b/lgc/test/MulDx9Zero.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1030 --emit-llvm -v -o=- - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/NggInPassthroughMode.lgc b/lgc/test/NggInPassthroughMode.lgc index 0903941e79..c7b0fa89e3 100644 --- a/lgc/test/NggInPassthroughMode.lgc +++ b/lgc/test/NggInPassthroughMode.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: lgc -march=amdgcn--amdpal -mcpu=gfx1100 -o - <%s | FileCheck %s ; Check that NGG passthrough mode is used for that shader. If a s_sendmsg diff --git a/lgc/test/PartPipeline.lgc b/lgc/test/PartPipeline.lgc index 7f727ffb87..d8f74dc8e3 100644 --- a/lgc/test/PartPipeline.lgc +++ b/lgc/test/PartPipeline.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: lgc -mcpu=gfx1030 -extract=2 -o %t.fs.elf %s ; RUN: lgc -mcpu=gfx1030 -extract=3 -other=%t.fs.elf -o %t.vs.elf %s ; RUN: lgc -mcpu=gfx1030 -extract=1 -l %s -o %t.pipe.elf %t.vs.elf %t.fs.elf diff --git a/lgc/test/PatchInvalidImageDescriptor.lgc b/lgc/test/PatchInvalidImageDescriptor.lgc index 562d1296f8..d167662772 100644 --- a/lgc/test/PatchInvalidImageDescriptor.lgc +++ b/lgc/test/PatchInvalidImageDescriptor.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Test that invalid image descriptor patching is applied where required. ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-apply-workarounds -o - - <%s 2>&1 | FileCheck --check-prefixes=CHECK,GFX1010 %s diff --git a/lgc/test/PeepholeOptPhiWithIdenticalLoad.lgc b/lgc/test/PeepholeOptPhiWithIdenticalLoad.lgc index fe469fddae..308ffdcce2 100644 --- a/lgc/test/PeepholeOptPhiWithIdenticalLoad.lgc +++ b/lgc/test/PeepholeOptPhiWithIdenticalLoad.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Test that PHI with incoming value that may read from memory should not be optimized. ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-peephole-optimization -o - - <%s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/PhiWithArgument.lgc b/lgc/test/PhiWithArgument.lgc index 6ac0a326fc..f5caa2040a 100644 --- a/lgc/test/PhiWithArgument.lgc +++ b/lgc/test/PhiWithArgument.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 - <%s | FileCheck --check-prefixes=VS-ISA %s diff --git a/lgc/test/ScalarizeInputWithDynamicIndexUser.lgc b/lgc/test/ScalarizeInputWithDynamicIndexUser.lgc index e12d056662..ba4d3e26e4 100644 --- a/lgc/test/ScalarizeInputWithDynamicIndexUser.lgc +++ b/lgc/test/ScalarizeInputWithDynamicIndexUser.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function lgc.shader.FS.main ; Check that if the generic input has an extract instruction user whose index is dynamic, the input should be scalarized for each component. ; RUN: lgc -mcpu=gfx1010 -stop-after=lgc-collect-resource-usage %s -o=- | FileCheck %s diff --git a/lgc/test/ShaderStages.lgc b/lgc/test/ShaderStages.lgc index e5d8e8de4c..3a99cd4d46 100644 --- a/lgc/test/ShaderStages.lgc +++ b/lgc/test/ShaderStages.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; ---------------------------------------------------------------------- ; Extract 1: CS diff --git a/lgc/test/SubgroupClusteredReduction.lgc b/lgc/test/SubgroupClusteredReduction.lgc index 6ccb6e960b..aa6a40f48f 100644 --- a/lgc/test/SubgroupClusteredReduction.lgc +++ b/lgc/test/SubgroupClusteredReduction.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - --mcpu=gfx1100 --emit-llvm %s | FileCheck -check-prefixes=CHECK %s diff --git a/lgc/test/TaskShaderEntryArgs.lgc b/lgc/test/TaskShaderEntryArgs.lgc index 47eeb896ed..e67405cf1e 100644 --- a/lgc/test/TaskShaderEntryArgs.lgc +++ b/lgc/test/TaskShaderEntryArgs.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Test that the arguments of task shader entry-point are generated as expected. ; RUN: lgc -mcpu=gfx1030 --emit-llvm -o=- - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/TaskShaderOps.lgc b/lgc/test/TaskShaderOps.lgc index 2cd877c367..5b9c8a3175 100644 --- a/lgc/test/TaskShaderOps.lgc +++ b/lgc/test/TaskShaderOps.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Test that the operations of task shader are handled as expected. ; RUN: lgc -mcpu=gfx1030 --emit-llvm -v -o=- - <%s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/TaskShaderRegConfig.lgc b/lgc/test/TaskShaderRegConfig.lgc index 79071253f1..10e6a1fd68 100644 --- a/lgc/test/TaskShaderRegConfig.lgc +++ b/lgc/test/TaskShaderRegConfig.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --check-pal-metadata ; Test that relevant registers of task shader are built as expected. @@ -53,8 +79,8 @@ attributes #0 = { nounwind } ; CHECK-NEXT: .cs: ; CHECK-NEXT: .checksum_value: 0xbbc4ff6d ; CHECK-NEXT: .debug_mode: false -; CHECK-NEXT: .entry_point{{(_symbol)?}}: _amdgpu_cs_main -; CHECK-NEXT: .excp_en: 0 +; CHECK: .entry_point{{(_symbol)?}}: _amdgpu_cs_main +; CHECK: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 ; CHECK-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0 diff --git a/lgc/test/TestWaterfallLoopForStruct.lgc b/lgc/test/TestWaterfallLoopForStruct.lgc index 8aa82da97b..da4c1680a0 100644 --- a/lgc/test/TestWaterfallLoopForStruct.lgc +++ b/lgc/test/TestWaterfallLoopForStruct.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s ; ModuleID = 'lgcPipeline' diff --git a/lgc/test/TextureRange.lgc b/lgc/test/TextureRange.lgc index 46893b37a5..e96e23d07d 100644 --- a/lgc/test/TextureRange.lgc +++ b/lgc/test/TextureRange.lgc @@ -1,4 +1,29 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: lgc %s -print-after=lgc-lower-desc -o /dev/null 2>&1 - <%s | FileCheck --check-prefixes=CHECK %s ; CHECK: call <2 x i32> @lgc.load.user.data__v2i32(i32 24) diff --git a/lgc/test/Transforms/CombineCooperativeMatrix/constants.lgc b/lgc/test/Transforms/CombineCooperativeMatrix/constants.lgc index 95b65d3c05..e496a3086c 100644 --- a/lgc/test/Transforms/CombineCooperativeMatrix/constants.lgc +++ b/lgc/test/Transforms/CombineCooperativeMatrix/constants.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes='require,function(lgc-combine-cooperative-matrix)' %s | FileCheck --check-prefixes=CHECK %s @@ -5,7 +31,7 @@ define <8 x float> @transpose_undef() { ; CHECK-LABEL: @transpose_undef( ; CHECK-NEXT: ret <8 x float> undef ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> undef, i32 1, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> undef, i32 1, i32 0) ret <8 x float> %r } @@ -13,7 +39,7 @@ define <8 x float> @transpose_poison() { ; CHECK-LABEL: @transpose_poison( ; CHECK-NEXT: ret <8 x float> poison ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> poison, i32 1, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> poison, i32 1, i32 0) ret <8 x float> %r } @@ -21,7 +47,7 @@ define <8 x float> @transpose_zero() { ; CHECK-LABEL: @transpose_zero( ; CHECK-NEXT: ret <8 x float> zeroinitializer ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> zeroinitializer, i32 1, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> zeroinitializer, i32 1, i32 0) ret <8 x float> %r } @@ -29,7 +55,7 @@ define <8 x float> @relayout_undef() { ; CHECK-LABEL: @relayout_undef( ; CHECK-NEXT: ret <8 x float> undef ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> undef, i32 1, i32 1, i32 0, i32 1) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> undef, i32 1, i32 1, i32 0, i32 1) ret <8 x float> %r } @@ -37,7 +63,7 @@ define <8 x float> @relayout_poison() { ; CHECK-LABEL: @relayout_poison( ; CHECK-NEXT: ret <8 x float> poison ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> poison, i32 1, i32 1, i32 0, i32 1) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> poison, i32 1, i32 1, i32 0, i32 1) ret <8 x float> %r } @@ -45,16 +71,16 @@ define <8 x float> @relayout_zero() { ; CHECK-LABEL: @relayout_zero( ; CHECK-NEXT: ret <8 x float> zeroinitializer ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> zeroinitializer, i32 1, i32 1, i32 0, i32 1) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> zeroinitializer, i32 1, i32 1, i32 0, i32 1) ret <8 x float> %r } define <8 x float> @fptrunc_undef() { ; CHECK-LABEL: @fptrunc_undef( -; CHECK-NEXT: [[R:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 45, <8 x float> undef, i32 2, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[R:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 45, <8 x float> undef, i32 2, i32 1, i32 0, i32 0) ; CHECK-NEXT: ret <8 x float> [[R]] ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 45, <8 x float> undef, i32 2, i32 1, i32 0, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 45, <8 x float> undef, i32 2, i32 1, i32 0, i32 0) ret <8 x float> %r } @@ -62,7 +88,7 @@ define <8 x float> @fptrunc_poison() { ; CHECK-LABEL: @fptrunc_poison( ; CHECK-NEXT: ret <8 x float> poison ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 45, <8 x float> poison, i32 2, i32 1, i32 0, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 45, <8 x float> poison, i32 2, i32 1, i32 0, i32 0) ret <8 x float> %r } @@ -70,16 +96,16 @@ define <8 x float> @fptrunc_zero() { ; CHECK-LABEL: @fptrunc_zero( ; CHECK-NEXT: ret <8 x float> zeroinitializer ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 45, <8 x float> zeroinitializer, i32 2, i32 1, i32 0, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 45, <8 x float> zeroinitializer, i32 2, i32 1, i32 0, i32 0) ret <8 x float> %r } define <8 x float> @fpext_undef() { ; CHECK-LABEL: @fpext_undef( -; CHECK-NEXT: [[R:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 46, <8 x float> undef, i32 1, i32 2, i32 0, i32 0) +; CHECK-NEXT: [[R:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 46, <8 x float> undef, i32 1, i32 2, i32 0, i32 0) ; CHECK-NEXT: ret <8 x float> [[R]] ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 46, <8 x float> undef, i32 1, i32 2, i32 0, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 46, <8 x float> undef, i32 1, i32 2, i32 0, i32 0) ret <8 x float> %r } @@ -87,7 +113,7 @@ define <8 x float> @fpext_poison() { ; CHECK-LABEL: @fpext_poison( ; CHECK-NEXT: ret <8 x float> poison ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 46, <8 x float> poison, i32 1, i32 2, i32 0, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 46, <8 x float> poison, i32 1, i32 2, i32 0, i32 0) ret <8 x float> %r } @@ -95,16 +121,16 @@ define <8 x float> @fpext_zero() { ; CHECK-LABEL: @fpext_zero( ; CHECK-NEXT: ret <8 x float> zeroinitializer ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 46, <8 x float> zeroinitializer, i32 1, i32 2, i32 0, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 46, <8 x float> zeroinitializer, i32 1, i32 2, i32 0, i32 0) ret <8 x float> %r } define <8 x i32> @trunc_undef() { ; CHECK-LABEL: @trunc_undef( -; CHECK-NEXT: [[R:%.*]] = call <8 x i32> (...) @lgc.cooperative.matrix.convert__v8i32(i32 38, <8 x i32> undef, i32 5, i32 4, i32 0, i32 0) +; CHECK-NEXT: [[R:%.*]] = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v8i32(i32 38, <8 x i32> undef, i32 5, i32 4, i32 0, i32 0) ; CHECK-NEXT: ret <8 x i32> [[R]] ; - %r = call <8 x i32> (...) @lgc.cooperative.matrix.convert__v8i32(i32 38, <8 x i32> undef, i32 5, i32 4, i32 0, i32 0) + %r = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v8i32(i32 38, <8 x i32> undef, i32 5, i32 4, i32 0, i32 0) ret <8 x i32> %r } @@ -112,7 +138,7 @@ define <8 x i32> @trunc_poison() { ; CHECK-LABEL: @trunc_poison( ; CHECK-NEXT: ret <8 x i32> poison ; - %r = call <8 x i32> (...) @lgc.cooperative.matrix.convert__v8i32(i32 38, <8 x i32> poison, i32 5, i32 4, i32 0, i32 0) + %r = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v8i32(i32 38, <8 x i32> poison, i32 5, i32 4, i32 0, i32 0) ret <8 x i32> %r } @@ -120,16 +146,16 @@ define <8 x i32> @trunc_zero() { ; CHECK-LABEL: @trunc_zero( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; - %r = call <8 x i32> (...) @lgc.cooperative.matrix.convert__v8i32(i32 38, <8 x i32> zeroinitializer, i32 5, i32 4, i32 0, i32 0) + %r = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v8i32(i32 38, <8 x i32> zeroinitializer, i32 5, i32 4, i32 0, i32 0) ret <8 x i32> %r } define <8 x i32> @zext_undef() { ; CHECK-LABEL: @zext_undef( -; CHECK-NEXT: [[R:%.*]] = call <8 x i32> (...) @lgc.cooperative.matrix.convert__v8i32(i32 39, <8 x i32> undef, i32 4, i32 5, i32 0, i32 0) +; CHECK-NEXT: [[R:%.*]] = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v8i32(i32 39, <8 x i32> undef, i32 4, i32 5, i32 0, i32 0) ; CHECK-NEXT: ret <8 x i32> [[R]] ; - %r = call <8 x i32> (...) @lgc.cooperative.matrix.convert__v8i32(i32 39, <8 x i32> undef, i32 4, i32 5, i32 0, i32 0) + %r = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v8i32(i32 39, <8 x i32> undef, i32 4, i32 5, i32 0, i32 0) ret <8 x i32> %r } @@ -137,7 +163,7 @@ define <8 x i32> @zext_poison() { ; CHECK-LABEL: @zext_poison( ; CHECK-NEXT: ret <8 x i32> poison ; - %r = call <8 x i32> (...) @lgc.cooperative.matrix.convert__v8i32(i32 39, <8 x i32> poison, i32 4, i32 5, i32 0, i32 0) + %r = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v8i32(i32 39, <8 x i32> poison, i32 4, i32 5, i32 0, i32 0) ret <8 x i32> %r } @@ -145,10 +171,10 @@ define <8 x i32> @zext_zero() { ; CHECK-LABEL: @zext_zero( ; CHECK-NEXT: ret <8 x i32> zeroinitializer ; - %r = call <8 x i32> (...) @lgc.cooperative.matrix.convert__v8i32(i32 39, <8 x i32> zeroinitializer, i32 4, i32 5, i32 0, i32 0) + %r = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v8i32(i32 39, <8 x i32> zeroinitializer, i32 4, i32 5, i32 0, i32 0) ret <8 x i32> %r } -declare <8 x float> @lgc.cooperative.matrix.transpose__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.convert__v8f32(...) -declare <8 x i32> @lgc.cooperative.matrix.convert__v8i32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.transpose__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.convert__v8f32(...) +declare <8 x i32> @lgc.xdl.cooperative.matrix.convert__v8i32(...) diff --git a/lgc/test/Transforms/CombineCooperativeMatrix/matmul-loop.lgc b/lgc/test/Transforms/CombineCooperativeMatrix/matmul-loop.lgc index 17289c70a5..aaf74e70e9 100644 --- a/lgc/test/Transforms/CombineCooperativeMatrix/matmul-loop.lgc +++ b/lgc/test/Transforms/CombineCooperativeMatrix/matmul-loop.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 ; RUN: lgc -o - -passes='require,function(lgc-combine-cooperative-matrix)' %s | FileCheck --check-prefixes=CHECK %s @@ -5,21 +31,21 @@ define void @matmul_f16(ptr %ptr) { ; CHECK-LABEL: define void @matmul_f16 ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ACCUM_LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[ACCUM_LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 0) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[ACCUM_PHI:%.*]] = phi <8 x float> [ [[ACCUM_LOAD]], [[ENTRY:%.*]] ], [ [[MULADD:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[A:%.*]] = call <8 x float> @getmat1() ; CHECK-NEXT: [[B:%.*]] = call <8 x float> @getmat1() -; CHECK-NEXT: [[MULADD]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM_PHI]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; CHECK-NEXT: [[MULADD]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM_PHI]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ; CHECK-NEXT: [[CC:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] ; CHECK: end: -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[MULADD]]) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[MULADD]]) ; CHECK-NEXT: ret void ; entry: - %accum.load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0, i32 0) + %accum.load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0, i32 0) br label %loop loop: @@ -28,15 +54,15 @@ loop: %a = call <8 x float> @getmat1() %b = call <8 x float> @getmat1() - %accum.cvt = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum.phi, i32 1, i32 1, i32 0, i32 1) - %muladd = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %accum.next = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladd, i32 1, i32 1, i32 1, i32 0) + %accum.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum.phi, i32 1, i32 1, i32 0, i32 1) + %muladd = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %accum.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladd, i32 1, i32 1, i32 1, i32 0) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end end: - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum.next) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum.next) ret void } @@ -49,11 +75,11 @@ define void @matmul_f16_initzero(ptr %ptr) { ; CHECK-NEXT: [[ACCUM_PHI:%.*]] = phi <8 x float> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[MULADD:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[A:%.*]] = call <8 x float> @getmat1() ; CHECK-NEXT: [[B:%.*]] = call <8 x float> @getmat1() -; CHECK-NEXT: [[MULADD]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM_PHI]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; CHECK-NEXT: [[MULADD]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM_PHI]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ; CHECK-NEXT: [[CC:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] ; CHECK: end: -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[MULADD]]) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[MULADD]]) ; CHECK-NEXT: ret void ; entry: @@ -65,22 +91,22 @@ loop: %a = call <8 x float> @getmat1() %b = call <8 x float> @getmat1() - %accum.cvt = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum.phi, i32 1, i32 1, i32 0, i32 1) - %muladd = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %accum.next = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladd, i32 1, i32 1, i32 1, i32 0) + %accum.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum.phi, i32 1, i32 1, i32 0, i32 1) + %muladd = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %accum.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladd, i32 1, i32 1, i32 1, i32 0) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end end: - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum.next) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum.next) ret void } declare i1 @getcc() declare <8 x float> @getmat1() -declare <8 x float> @lgc.cooperative.matrix.load__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.convert__v8f32(...) -declare void @lgc.cooperative.matrix.store(...) -declare <8 x float> @lgc.cooperative.matrix.muladd__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.load__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.convert__v8f32(...) +declare void @lgc.xdl.cooperative.matrix.store(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.muladd__v8f32(...) diff --git a/lgc/test/Transforms/CombineCooperativeMatrix/packed-accumulators.lgc b/lgc/test/Transforms/CombineCooperativeMatrix/packed-accumulators.lgc index b37fc39acb..eebb37f642 100644 --- a/lgc/test/Transforms/CombineCooperativeMatrix/packed-accumulators.lgc +++ b/lgc/test/Transforms/CombineCooperativeMatrix/packed-accumulators.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 ; RUN: lgc --mcpu=gfx1100 -o - -passes='require,function(lgc-combine-cooperative-matrix)' %s | FileCheck --check-prefixes=GFX11 %s @@ -5,20 +31,20 @@ define void @matmul_f16_pack_simple(ptr %out0, ptr %out1, <8 x float> %a, <8 x f ; GFX11-LABEL: define void @matmul_f16_pack_simple ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi) ret void } @@ -26,24 +52,24 @@ define void @matmul_f16_pack_chain_sequential(ptr %out0, ptr %out1, <8 x float> ; GFX11-LABEL: define void @matmul_f16_pack_chain_sequential ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_2]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_2]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) ; GFX11-NEXT: ret void ; entry: - %chain0.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain0.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) ret void } @@ -51,24 +77,24 @@ define void @matmul_f16_pack_chain_alternating(ptr %out0, ptr %out1, <8 x float> ; GFX11-LABEL: define void @matmul_f16_pack_chain_alternating ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_2]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_2]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) ; GFX11-NEXT: ret void ; entry: - %chain0.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain0.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) ret void } @@ -76,24 +102,24 @@ define void @matmul_f16_pack_chain_nested(ptr %out0, ptr %out1, <8 x float> %a, ; GFX11-LABEL: define void @matmul_f16_pack_chain_nested ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_2]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN0_2]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN0_2]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_2]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN0_2]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN0_2]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) ; GFX11-NEXT: ret void ; entry: - %chain0.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain0.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) ret void } @@ -101,15 +127,15 @@ define void @matmul_f16_no_packable_chain(ptr %out0, ptr %out1, <8 x float> %a, ; GFX11-LABEL: define void @matmul_f16_no_packable_chain ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_1]]) +; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_1]]) ; GFX11-NEXT: ret void ; entry: - %chain0.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.1) + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.1) ret void } @@ -117,44 +143,44 @@ define void @matmul_f16_chain_loop(ptr %out0, ptr %out1, <8 x float> %a, <8 x fl ; GFX11-LABEL: define void @matmul_f16_chain_loop ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 false) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) -; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 false) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) +; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]]) ; GFX11-NEXT: br label [[LOOP:%.*]] ; GFX11: loop: ; GFX11-NEXT: [[ACCUM1_PHI:%.*]] = phi <8 x float> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[CHAIN1_2:%.*]], [[LOOP]] ] -; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM1_PHI]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_2]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_2]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM1_PHI]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_2]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_2]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) ; GFX11-NEXT: [[CC:%.*]] = call i1 @getcc() ; GFX11-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] ; GFX11: end: -; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP4]]) -; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP5]]) +; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP4]]) +; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_2]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP5]]) ; GFX11-NEXT: ret void ; entry: - %chain0.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) br label %loop loop: %accum0.phi = phi <8 x float> [ %chain0.1, %entry ], [ %chain0.2, %loop ] %accum1.phi = phi <8 x float> [ %chain1.1, %entry ], [ %chain1.2, %loop ] - %chain0.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum0.phi, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum1.phi, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum0.phi, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum1.phi, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end end: - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) ret void } @@ -162,28 +188,28 @@ define void @matmul_f16_chain_loop_phis(ptr %out0, ptr %out1, <8 x float> %a, <8 ; GFX11-LABEL: define void @matmul_f16_chain_loop_phis ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[ACCUM0_LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[OUT0]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) -; GFX11-NEXT: [[ACCUM1_LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[OUT1]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[ACCUM0_LOAD]], <8 x float> [[ACCUM1_LOAD]]) +; GFX11-NEXT: [[ACCUM0_LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[OUT0]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) +; GFX11-NEXT: [[ACCUM1_LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[OUT1]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[ACCUM0_LOAD]], <8 x float> [[ACCUM1_LOAD]]) ; GFX11-NEXT: br label [[HEADER:%.*]] ; GFX11: header: ; GFX11-NEXT: [[ACCUM1_PHI:%.*]] = phi <8 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[MULADDHI:%.*]], [[LOOP:%.*]] ] ; GFX11-NEXT: [[CC:%.*]] = call i1 @getcc() ; GFX11-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] ; GFX11: loop: -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM1_PHI]], i1 false, i1 false, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 false, i1 false, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM1_PHI]], i1 false, i1 false, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 false, i1 false, i1 true, i1 true, i32 1, i32 1, i32 1) ; GFX11-NEXT: br label [[HEADER]] ; GFX11: end: -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[ACCUM1_PHI]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[TMP1]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[ACCUM1_PHI]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[ACCUM1_PHI]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[TMP1]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[ACCUM1_PHI]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[TMP2]]) ; GFX11-NEXT: ret void ; entry: - %accum0.load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %out0, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) #0 - %accum1.load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %out1, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) #0 + %accum0.load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %out0, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) #0 + %accum1.load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %out1, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) #0 br label %header header: @@ -193,18 +219,18 @@ header: br i1 %cc, label %loop, label %end loop: - %accum0.cvt = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum0.phi, i32 1, i32 1, i32 0, i32 1) - %accum1.cvt = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum1.phi, i32 1, i32 1, i32 0, i32 1) - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum0.cvt, i1 false, i1 false, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum1.cvt, i1 false, i1 false, i1 false, i1 false, i32 1, i32 1, i32 1) - %accum0.next = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdLo, i32 1, i32 1, i32 1, i32 0) - %accum1.next = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdHi, i32 1, i32 1, i32 1, i32 0) + %accum0.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum0.phi, i32 1, i32 1, i32 0, i32 1) + %accum1.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum1.phi, i32 1, i32 1, i32 0, i32 1) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum0.cvt, i1 false, i1 false, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum1.cvt, i1 false, i1 false, i1 false, i1 false, i32 1, i32 1, i32 1) + %accum0.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdLo, i32 1, i32 1, i32 1, i32 0) + %accum1.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdHi, i32 1, i32 1, i32 1, i32 0) br label %header end: - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum0.phi) #2 - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum1.phi) #2 + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum0.phi) #2 + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum1.phi) #2 ret void } @@ -212,49 +238,49 @@ define void @matmul_f16_chain_branch(ptr %out0, ptr %out1, <8 x float> %a, <8 x ; GFX11-LABEL: define void @matmul_f16_chain_branch ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) ; GFX11-NEXT: [[CC:%.*]] = call i1 @getcc() ; GFX11-NEXT: br i1 [[CC]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; GFX11: if_true: -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 false) -; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) -; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 false) +; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) +; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ; GFX11-NEXT: br label [[END:%.*]] ; GFX11: if_false: ; GFX11-NEXT: [[A_FALSE:%.*]] = call <8 x float> @getmat1() ; GFX11-NEXT: [[B_FALSE:%.*]] = call <8 x float> @getmat1() -; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 false) -; GFX11-NEXT: [[CHAIN0_3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A_FALSE]], <8 x float> [[B_FALSE]], <8 x float> [[TMP3]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) -; GFX11-NEXT: [[CHAIN1_3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A_FALSE]], <8 x float> [[B_FALSE]], <8 x float> [[TMP4]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 false) +; GFX11-NEXT: [[CHAIN0_3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A_FALSE]], <8 x float> [[B_FALSE]], <8 x float> [[TMP3]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) +; GFX11-NEXT: [[CHAIN1_3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A_FALSE]], <8 x float> [[B_FALSE]], <8 x float> [[TMP4]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ; GFX11-NEXT: br label [[END]] ; GFX11: end: ; GFX11-NEXT: [[ACCUM0_PHI:%.*]] = phi <8 x float> [ [[CHAIN0_2]], [[IF_TRUE]] ], [ [[CHAIN0_3]], [[IF_FALSE]] ] ; GFX11-NEXT: [[ACCUM1_PHI:%.*]] = phi <8 x float> [ [[CHAIN1_2]], [[IF_TRUE]] ], [ [[CHAIN1_3]], [[IF_FALSE]] ] -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[ACCUM0_PHI]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[ACCUM1_PHI]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[ACCUM0_PHI]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[ACCUM1_PHI]]) ; GFX11-NEXT: ret void ; entry: - %chain0.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) %cc = call i1 @getcc() br i1 %cc, label %if_true, label %if_false if_true: - %chain0.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) br label %end if_false: %a.false = call <8 x float> @getmat1() %b.false = call <8 x float> @getmat1() - %chain0.3 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a.false, <8 x float> %b.false, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.3 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a.false, <8 x float> %b.false, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.3 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a.false, <8 x float> %b.false, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.3 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a.false, <8 x float> %b.false, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) br label %end @@ -262,8 +288,8 @@ end: %accum0.phi = phi <8 x float> [ %chain0.2, %if_true ], [ %chain0.3, %if_false ] %accum1.phi = phi <8 x float> [ %chain1.2, %if_true ], [ %chain1.3, %if_false ] - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum0.phi) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum1.phi) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum0.phi) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum1.phi) ret void } @@ -271,36 +297,36 @@ define void @matmul_f16_chain_diff_bbs(ptr %out0, ptr %out1, <8 x float> %a, <8 ; GFX11-LABEL: define void @matmul_f16_chain_diff_bbs ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) ; GFX11-NEXT: [[CC:%.*]] = call i1 @getcc() ; GFX11-NEXT: br label [[CONT:%.*]] ; GFX11: cont: -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 false) -; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) -; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 false) +; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) +; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ; GFX11-NEXT: br label [[END:%.*]] ; GFX11: end: -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) ; GFX11-NEXT: ret void ; entry: - %chain0.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) %cc = call i1 @getcc() br label %cont cont: - %chain0.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) br label %end end: - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) ret void } @@ -308,28 +334,28 @@ define void @matmul_f16_pack_loop(ptr %out0, ptr %out1) { ; GFX11-LABEL: define void @matmul_f16_pack_loop ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[ACCUM0_LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[OUT0]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) -; GFX11-NEXT: [[ACCUM1_LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[OUT1]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[ACCUM0_LOAD]], <8 x float> [[ACCUM1_LOAD]]) +; GFX11-NEXT: [[ACCUM0_LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[OUT0]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) +; GFX11-NEXT: [[ACCUM1_LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[OUT1]], i32 4, i1 false, i32 1, i32 1, i32 0, i32 16) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[ACCUM0_LOAD]], <8 x float> [[ACCUM1_LOAD]]) ; GFX11-NEXT: br label [[LOOP:%.*]] ; GFX11: loop: ; GFX11-NEXT: [[ACCUM1_PHI:%.*]] = phi <8 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[MULADDHI:%.*]], [[LOOP]] ] ; GFX11-NEXT: [[A:%.*]] = call <8 x float> @getmat1() ; GFX11-NEXT: [[B:%.*]] = call <8 x float> @getmat1() -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM1_PHI]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM1_PHI]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) ; GFX11-NEXT: [[CC:%.*]] = call i1 @getcc() ; GFX11-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] ; GFX11: end: -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[TMP1]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[TMP1]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[TMP2]]) ; GFX11-NEXT: ret void ; entry: - %accum0.load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %out0, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) - %accum1.load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %out1, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) + %accum0.load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %out0, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) + %accum1.load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %out1, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16) br label %loop loop: @@ -339,19 +365,19 @@ loop: %a = call <8 x float> @getmat1() %b = call <8 x float> @getmat1() - %accum0.cvt = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum0.phi, i32 1, i32 1, i32 0, i32 1) - %accum1.cvt = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum1.phi, i32 1, i32 1, i32 0, i32 1) - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum0.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum1.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %accum0.next = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdLo, i32 1, i32 1, i32 1, i32 0) - %accum1.next = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdHi, i32 1, i32 1, i32 1, i32 0) + %accum0.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum0.phi, i32 1, i32 1, i32 0, i32 1) + %accum1.cvt = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum1.phi, i32 1, i32 1, i32 0, i32 1) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum0.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum1.cvt, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %accum0.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdLo, i32 1, i32 1, i32 1, i32 0) + %accum1.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdHi, i32 1, i32 1, i32 1, i32 0) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end end: - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum0.next) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum1.next) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum0.next) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %accum1.next) ret void } @@ -359,23 +385,23 @@ define void @matmul_f16_pack_scalar_same(ptr %out0, ptr %out1, <8 x float> %a, < ; GFX11-LABEL: define void @matmul_f16_pack_scalar_same ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], <2 x half> {{(splat \(half 0xH310F\))|()}}, i32 6, i32 1) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP1]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) -; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP1]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP3]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], <2 x half> {{(splat \(half 0xH310F\))|()}}, i32 6, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP1]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP1]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP3]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %scaledLo = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) - %scaledHi = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH310F, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH310F, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) ret void } @@ -383,23 +409,23 @@ define void @matmul_f16_pack_scalar_different(ptr %out0, ptr %out1, <8 x float> ; GFX11-LABEL: define void @matmul_f16_pack_scalar_different ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], <2 x half> , i32 6, i32 1) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP1]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) -; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP1]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP3]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], <2 x half> , i32 6, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP1]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP1]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP3]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %scaledLo = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) - %scaledHi = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH3100, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH3100, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) ret void } @@ -407,22 +433,22 @@ define void @matmul_f16_pack_scalar_only_lo(ptr %out0, ptr %out1, <8 x float> %a ; GFX11-LABEL: define void @matmul_f16_pack_scalar_only_lo ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) -; GFX11-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP1]], half 0xH310F, i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) +; GFX11-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP1]], half 0xH310F, i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %scaledLo = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi) ret void } @@ -430,22 +456,22 @@ define void @matmul_f16_pack_scalar_only_hi(ptr %out0, ptr %out1, <8 x float> %a ; GFX11-LABEL: define void @matmul_f16_pack_scalar_only_hi ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) -; GFX11-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP1]], half 0xH3100, i32 1, i32 1) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) +; GFX11-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP1]], half 0xH3100, i32 1, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP2]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %scaledHi = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH3100, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH3100, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) ret void } @@ -453,39 +479,39 @@ define void @matmul_f16_pack_scalar_diff_bbs(ptr %out0, ptr %out1, <8 x float> % ; GFX11-LABEL: define void @matmul_f16_pack_scalar_diff_bbs ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) ; GFX11-NEXT: br label [[SCALE_LO:%.*]] ; GFX11: scale_lo: -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) -; GFX11-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP1]], half 0xH310F, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) +; GFX11-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP1]], half 0xH310F, i32 1, i32 1) ; GFX11-NEXT: br label [[SCALE_HI:%.*]] ; GFX11: scale_hi: -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) -; GFX11-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP2]], half 0xH310F, i32 1, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) +; GFX11-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP2]], half 0xH310F, i32 1, i32 1) ; GFX11-NEXT: br label [[END:%.*]] ; GFX11: end: -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) br label %scale_lo scale_lo: - %scaledLo = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) br label %scale_hi scale_hi: - %scaledHi = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH310F, i32 1, i32 1) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH310F, i32 1, i32 1) br label %end end: - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) ret void } @@ -493,36 +519,36 @@ define void @matmul_f16_pack_user_between_scalar(ptr %out0, ptr %out1, <8 x floa ; GFX11-LABEL: define void @matmul_f16_pack_user_between_scalar ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) ; GFX11-NEXT: br label [[SCALE:%.*]] ; GFX11: scale: -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) -; GFX11-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP1]], half 0xH310F, i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) -; GFX11-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP2]], half 0xH310F, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 false) +; GFX11-NEXT: [[SCALEDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP1]], half 0xH310F, i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI]], i1 true) +; GFX11-NEXT: [[SCALEDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[TMP2]], half 0xH310F, i32 1, i32 1) ; GFX11-NEXT: br label [[END:%.*]] ; GFX11: end: -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDLO]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALEDHI]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) br label %scale scale: - %scaledLo = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) - %scaledHi = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH310F, i32 1, i32 1) + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half 0xH310F, i32 1, i32 1) br label %end end: - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) ret void } @@ -530,36 +556,36 @@ define void @matmul_f16_pack_factor_between_scalar(ptr %in, ptr %out0, ptr %out1 ; GFX11-LABEL: define void @matmul_f16_pack_factor_between_scalar ; GFX11-SAME: (ptr [[IN:%.*]], ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) ; GFX11-NEXT: br label [[SCALE:%.*]] ; GFX11: scale: ; GFX11-NEXT: [[FACTORHI:%.*]] = load half, ptr [[IN]], align 2 ; GFX11-NEXT: [[TMP1:%.*]] = insertelement <2 x half> , half [[FACTORHI]], i32 1 -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], <2 x half> [[TMP1]], i32 6, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[MULADDHI]], <2 x half> [[TMP1]], i32 6, i32 1) ; GFX11-NEXT: br label [[END:%.*]] ; GFX11: end: -; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP2]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP3]]) -; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP2]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP4]]) +; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP2]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP3]]) +; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[TMP2]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP4]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) br label %scale scale: - %scaledLo = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) + %scaledLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdLo, half 0xH310F, i32 1, i32 1) %factorHi = load half, ptr %in - %scaledHi = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half %factorHi, i32 1, i32 1) + %scaledHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %muladdHi, half %factorHi, i32 1, i32 1) br label %end end: - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scaledHi) ret void } @@ -567,31 +593,31 @@ define void @matmul_f16_pack_binop_fadd(ptr %out0, ptr %out1, <8 x float> %a, <8 ; GFX11-LABEL: define void @matmul_f16_pack_binop_fadd ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]], <8 x float> [[C2:%.*]], <8 x float> [[C3:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO0]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C2]], <8 x float> [[C3]]) -; GFX11-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 false) -; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 false) -; GFX11-NEXT: [[BINOPLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP2]], <8 x float> [[TMP3]], i32 1, i32 1) -; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) -; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 true) -; GFX11-NEXT: [[BINOPHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP4]], <8 x float> [[TMP5]], i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPLO]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPHI]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO0]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C2]], <8 x float> [[C3]]) +; GFX11-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 false) +; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 false) +; GFX11-NEXT: [[BINOPLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP2]], <8 x float> [[TMP3]], i32 1, i32 1) +; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) +; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 true) +; GFX11-NEXT: [[BINOPHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP4]], <8 x float> [[TMP5]], i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPLO]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPHI]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo0 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi0 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdLo1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c3, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %binOpLo = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo0, <8 x float> %muladdLo1, i32 1, i32 1) #3 - %binOpHi = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdHi0, <8 x float> %muladdHi1, i32 1, i32 1) #3 - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpHi) + %muladdLo0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdLo1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c3, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %binOpLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo0, <8 x float> %muladdLo1, i32 1, i32 1) #3 + %binOpHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdHi0, <8 x float> %muladdHi1, i32 1, i32 1) #3 + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpHi) ret void } @@ -599,31 +625,31 @@ define void @matmul_f16_pack_binop_incompatible_matrices(ptr %out0, ptr %out1, < ; GFX11-LABEL: define void @matmul_f16_pack_binop_incompatible_matrices ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]], <8 x float> [[C2:%.*]], <8 x float> [[C3:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO0]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C2]], <8 x float> [[C3]]) -; GFX11-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 false) -; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 false) -; GFX11-NEXT: [[BINOPLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP2]], <8 x float> [[TMP3]], i32 1, i32 1) -; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) -; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) -; GFX11-NEXT: [[BINOPHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP5]], <8 x float> [[TMP4]], i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPLO]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPHI]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO0]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C2]], <8 x float> [[C3]]) +; GFX11-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 false) +; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 false) +; GFX11-NEXT: [[BINOPLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP2]], <8 x float> [[TMP3]], i32 1, i32 1) +; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) +; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) +; GFX11-NEXT: [[BINOPHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP5]], <8 x float> [[TMP4]], i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPLO]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPHI]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo0 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi0 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdLo1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c3, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %binOpLo = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo0, <8 x float> %muladdLo1, i32 1, i32 1) #3 - %binOpHi = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdHi0, <8 x float> %muladdHi0, i32 1, i32 1) #3 - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpHi) + %muladdLo0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdLo1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c3, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %binOpLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo0, <8 x float> %muladdLo1, i32 1, i32 1) #3 + %binOpHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdHi0, <8 x float> %muladdHi0, i32 1, i32 1) #3 + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpHi) ret void } @@ -631,31 +657,31 @@ define void @matmul_f16_pack_binop_incompatible_arithop(ptr %out0, ptr %out1, <8 ; GFX11-LABEL: define void @matmul_f16_pack_binop_incompatible_arithop ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]], <8 x float> [[C2:%.*]], <8 x float> [[C3:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO0]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C2]], <8 x float> [[C3]]) -; GFX11-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 false) -; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 false) -; GFX11-NEXT: [[BINOPLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP2]], <8 x float> [[TMP3]], i32 1, i32 1) -; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) -; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 true) -; GFX11-NEXT: [[BINOPHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 3, <8 x float> [[TMP4]], <8 x float> [[TMP5]], i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPLO]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPHI]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO0]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C2]], <8 x float> [[C3]]) +; GFX11-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 false) +; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 false) +; GFX11-NEXT: [[BINOPLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[TMP2]], <8 x float> [[TMP3]], i32 1, i32 1) +; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) +; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 true) +; GFX11-NEXT: [[BINOPHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 3, <8 x float> [[TMP4]], <8 x float> [[TMP5]], i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPLO]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOPHI]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo0 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi0 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdLo1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c3, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %binOpLo = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo0, <8 x float> %muladdLo1, i32 1, i32 1) #3 - %binOpHi = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 3, <8 x float> %muladdHi0, <8 x float> %muladdHi1, i32 1, i32 1) #3 - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpHi) + %muladdLo0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdLo1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c3, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %binOpLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo0, <8 x float> %muladdLo1, i32 1, i32 1) #3 + %binOpHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 3, <8 x float> %muladdHi0, <8 x float> %muladdHi1, i32 1, i32 1) #3 + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %binOpHi) ret void } @@ -663,31 +689,31 @@ define void @matmul_f16_unpack_before_convert(ptr %out0, ptr %out1, <8 x float> ; GFX11-LABEL: define void @matmul_f16_unpack_before_convert ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO0]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 false) -; GFX11-NEXT: [[CONVERTLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[TMP1]], i32 1, i32 1, i32 1, i32 0) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) -; GFX11-NEXT: [[CONVERTHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[TMP2]], i32 1, i32 1, i32 1, i32 0) -; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[CONVERTLO]], <8 x float> [[B]], <8 x float> [[TMP3]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[CONVERTHI]], <8 x float> [[MULADDLO1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP4]]) -; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 true) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP5]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[MULADDLO0]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 false) +; GFX11-NEXT: [[CONVERTLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[TMP1]], i32 1, i32 1, i32 1, i32 0) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI0]], i1 true) +; GFX11-NEXT: [[CONVERTHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[TMP2]], i32 1, i32 1, i32 1, i32 0) +; GFX11-NEXT: [[TMP3:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[MULADDLO1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[CONVERTLO]], <8 x float> [[B]], <8 x float> [[TMP3]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[CONVERTHI]], <8 x float> [[MULADDLO1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP4:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP4]]) +; GFX11-NEXT: [[TMP5:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[MULADDHI1]], i1 true) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP5]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo0 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi0 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %convertLo = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdLo0, i32 1, i32 1, i32 1, i32 0) - %convertHi = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdHi0, i32 1, i32 1, i32 1, i32 0) - %muladdLo1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %convertLo, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %muladdHi1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %convertHi, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo1) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi1) + %muladdLo0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi0 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %convertLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdLo0, i32 1, i32 1, i32 1, i32 0) + %convertHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladdHi0, i32 1, i32 1, i32 1, i32 0) + %muladdLo1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %convertLo, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %muladdHi1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %convertHi, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi1) ret void } @@ -695,17 +721,17 @@ define void @matmul_f32_no_pack(ptr %out0, ptr %out1, <8 x float> %a, <8 x float ; GFX11-LABEL: define void @matmul_f32_no_pack ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> [[MULADDLO]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> [[MULADDHI]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C1]], i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> [[MULADDLO]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> [[MULADDHI]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> %muladdLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> %muladdHi) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 2, i32 2, i32 2) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> %muladdLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 2, i32 0, i32 0, <8 x float> %muladdHi) ret void } @@ -713,19 +739,19 @@ define void @matmul_f16_modified_accumulator(ptr %out0, ptr %out1, <8 x float> % ; GFX11-LABEL: define void @matmul_f16_modified_accumulator ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[ACCUM_C2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADDLO]], <8 x float> [[C1]], i32 1, i32 1) -; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM_C2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDLO]]) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDHI]]) +; GFX11-NEXT: [[MULADDLO:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[C0]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[ACCUM_C2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADDLO]], <8 x float> [[C1]], i32 1, i32 1) +; GFX11-NEXT: [[MULADDHI:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[ACCUM_C2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDLO]]) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[MULADDHI]]) ; GFX11-NEXT: ret void ; entry: - %muladdLo = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %accum.c2 = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo, <8 x float> %c1, i32 1, i32 1) - %muladdHi = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum.c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi) + %muladdLo = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %accum.c2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladdLo, <8 x float> %c1, i32 1, i32 1) + %muladdHi = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %accum.c2, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdLo) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %muladdHi) ret void } @@ -733,24 +759,24 @@ define void @matmul_f16_store_between_muladds(ptr %out0, ptr %out1, <8 x float> ; GFX11-LABEL: define void @matmul_f16_store_between_muladds ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) -; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) -; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) +; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN0_2]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN1_1]], i1 true) +; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) ; GFX11-NEXT: ret void ; entry: - %chain0.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain0.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) - %chain1.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) ret void } @@ -758,33 +784,33 @@ define void @matmul_f16_store_within_chain(ptr %out0, ptr %out1, <8 x float> %a, ; GFX11-LABEL: define void @matmul_f16_store_within_chain ; GFX11-SAME: (ptr [[OUT0:%.*]], ptr [[OUT1:%.*]], <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x float> [[C0:%.*]], <8 x float> [[C1:%.*]]) { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) -; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) -; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN0_2]], i1 false) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) -; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN0_2]], i1 true) -; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; GFX11-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) +; GFX11-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> [[C0]], <8 x float> [[C1]]) +; GFX11-NEXT: [[CHAIN0_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP0]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN1_1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN0_1]], i1 true, i1 true, i1 true, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[CHAIN0_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[CHAIN1_1]], i1 true, i1 true, i1 false, i1 true, i32 1, i32 1, i32 1) +; GFX11-NEXT: [[TMP1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN0_2]], i1 false) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP1]]) +; GFX11-NEXT: [[TMP2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> [[CHAIN0_2]], i1 true) +; GFX11-NEXT: [[CHAIN1_2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[A]], <8 x float> [[B]], <8 x float> [[TMP2]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; GFX11-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[OUT1]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[CHAIN1_2]]) ; GFX11-NEXT: ret void ; entry: - %chain0.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain1.1 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %chain0.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) - %chain1.2 = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) + %chain0.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain1.1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %chain0.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain0.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain0.2) + %chain1.2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %chain1.1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %out1, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %chain1.2) ret void } declare i1 @getcc() declare <8 x float> @getmat1() -declare <8 x float> @lgc.cooperative.matrix.load__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.convert__v8f32(...) -declare void @lgc.cooperative.matrix.store(...) -declare <8 x float> @lgc.cooperative.matrix.muladd__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.times.scalar__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.binop__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.load__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.convert__v8f32(...) +declare void @lgc.xdl.cooperative.matrix.store(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.muladd__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.times.scalar__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.binop__v8f32(...) diff --git a/lgc/test/Transforms/CombineCooperativeMatrix/simple.lgc b/lgc/test/Transforms/CombineCooperativeMatrix/simple.lgc index c08bdd15c4..51b27b7afc 100644 --- a/lgc/test/Transforms/CombineCooperativeMatrix/simple.lgc +++ b/lgc/test/Transforms/CombineCooperativeMatrix/simple.lgc @@ -1,13 +1,39 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes='require,function(lgc-combine-cooperative-matrix)' %s | FileCheck --check-prefixes=CHECK %s define <8 x float> @noop_transpose(<8 x float> %x) { ; CHECK-LABEL: @noop_transpose( -; CHECK-NEXT: [[T:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> [[X:%.*]], i32 1, i32 0) +; CHECK-NEXT: [[T:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> [[X:%.*]], i32 1, i32 0) ; CHECK-NEXT: ret <8 x float> [[T]] ; - %t = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %x, i32 1, i32 0) + %t = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %x, i32 1, i32 0) ret <8 x float> %t } @@ -16,37 +42,37 @@ define <8 x float> @collapse_transpose(<8 x float> %x) { ; CHECK-NEXT: ret <8 x float> [[X:%.*]] ; - %t1 = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %x, i32 1, i32 0) - %t2 = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %t1, i32 1, i32 0) + %t1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %x, i32 1, i32 0) + %t2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %t1, i32 1, i32 0) ret <8 x float> %t2 } define <8 x float> @test_load_transpose(ptr addrspace(3) %ptr) { ; CHECK-LABEL: @test_load_transpose( -; CHECK-NEXT: [[A:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 false, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[A:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 false, i32 1, i32 0, i32 0) ; CHECK-NEXT: ret <8 x float> [[A]] ; - %a = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 0, i32 0) - %t = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %a, i32 1, i32 0) + %a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 0, i32 0) + %t = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %a, i32 1, i32 0) ret <8 x float> %t } define void @test_store_transpose(ptr addrspace(3) %ptr, <8 x float> %a) { ; CHECK-LABEL: @test_store_transpose( -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 false, i32 1, i32 0, i32 0, i32 16, <8 x float> [[A:%.*]]) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 false, i32 1, i32 0, i32 0, i32 16, <8 x float> [[A:%.*]]) ; CHECK-NEXT: ret void ; - %t = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %a, i32 1, i32 0) - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %t) + %t = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %a, i32 1, i32 0) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %t) ret void } define void @test_phi_transpose(ptr addrspace(7) %ptr, <8 x float> %init) { ; CHECK-LABEL: @test_phi_transpose( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> [[INIT:%.*]], i32 1, i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> [[INIT:%.*]], i32 1, i32 0) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[MATRIX:%.*]] = phi <8 x float> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP:%.*]], [[LOOP]] ] @@ -54,7 +80,7 @@ define void @test_phi_transpose(ptr addrspace(7) %ptr, <8 x float> %init) { ; CHECK-NEXT: [[CC:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] ; CHECK: end: -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP]]) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP]]) ; CHECK-NEXT: ret void ; entry: @@ -62,15 +88,15 @@ entry: loop: %matrix = phi <8 x float> [ %init, %entry ], [ %matrix.new, %loop ] - %t1 = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %matrix, i32 1, i32 0) + %t1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %matrix, i32 1, i32 0) %tmp = call <8 x float> @process1(<8 x float> %t1) - %matrix.new = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %tmp, i32 1, i32 0) + %matrix.new = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %tmp, i32 1, i32 0) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end end: - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16, <8 x float> %matrix.new) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 4, i1 false, i32 1, i32 0, i32 0, i32 16, <8 x float> %matrix.new) ret void } @@ -78,8 +104,8 @@ define <8 x float> @test_relayout_simple(<8 x float> %ab) { ; CHECK-LABEL: @test_relayout_simple( ; CHECK-NEXT: ret <8 x float> [[AB:%.*]] ; - %b = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %ab, i32 1, i32 1, i32 0, i32 1) - %c = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %b, i32 1, i32 1, i32 1, i32 0) + %b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %ab, i32 1, i32 1, i32 0, i32 1) + %c = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %b, i32 1, i32 1, i32 1, i32 0) ret <8 x float> %c } @@ -87,55 +113,55 @@ define <8 x float> @test_relayout_simple_reverse(<8 x float> %cd) { ; CHECK-LABEL: @test_relayout_simple_reverse( ; CHECK-NEXT: ret <8 x float> [[CD:%.*]] ; - %b = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %cd, i32 1, i32 1, i32 1, i32 0) - %c = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %b, i32 1, i32 1, i32 0, i32 1) + %b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %cd, i32 1, i32 1, i32 1, i32 0) + %c = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %b, i32 1, i32 1, i32 0, i32 1) ret <8 x float> %c } define <8 x float> @test_relayout_load(ptr addrspace(3) %ptr) { ; CHECK-LABEL: @test_relayout_load( -; CHECK-NEXT: [[A:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 1, i32 0) +; CHECK-NEXT: [[A:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 1, i32 0) ; CHECK-NEXT: ret <8 x float> [[A]] ; - %a = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 0, i32 0) - %b = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %a, i32 1, i32 1, i32 0, i32 1) + %a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 0, i32 0) + %b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %a, i32 1, i32 1, i32 0, i32 1) ret <8 x float> %b } define <8 x float> @test_relayout_load2(ptr addrspace(3) %ptr) { ; CHECK-LABEL: @test_relayout_load2( -; CHECK-NEXT: [[A:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[A:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 0, i32 0) ; CHECK-NEXT: ret <8 x float> [[A]] ; - %a = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 1, i32 0) - %b = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %a, i32 1, i32 1, i32 1, i32 0) + %a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 1, i32 0) + %b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %a, i32 1, i32 1, i32 1, i32 0) ret <8 x float> %b } define void @test_relayout_store(ptr addrspace(3) %ptr, <8 x float> %a) { ; CHECK-LABEL: @test_relayout_store( -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[A:%.*]]) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[A:%.*]]) ; CHECK-NEXT: ret void ; - %b = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %a, i32 1, i32 1, i32 0, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %b) + %b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %a, i32 1, i32 1, i32 0, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %b) ret void } define void @test_relayout_store2(ptr addrspace(3) %ptr, <8 x float> %a) { ; CHECK-LABEL: @test_relayout_store2( -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[A:%.*]]) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) [[PTR:%.*]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[A:%.*]]) ; CHECK-NEXT: ret void ; - %b = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %a, i32 1, i32 1, i32 1, i32 0) - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %b) + %b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %a, i32 1, i32 1, i32 1, i32 0) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %b) ret void } declare i1 @getcc() declare <8 x float> @process1(<8 x float>) -declare <8 x float> @lgc.cooperative.matrix.load__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.transpose__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.convert__v8f32(...) -declare void @lgc.cooperative.matrix.store(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.load__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.transpose__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.convert__v8f32(...) +declare void @lgc.xdl.cooperative.matrix.store(...) diff --git a/lgc/test/Transforms/CombineCooperativeMatrix/unhandled-inout.lgc b/lgc/test/Transforms/CombineCooperativeMatrix/unhandled-inout.lgc index 9af5b0dacd..9d5bf73dce 100644 --- a/lgc/test/Transforms/CombineCooperativeMatrix/unhandled-inout.lgc +++ b/lgc/test/Transforms/CombineCooperativeMatrix/unhandled-inout.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 ; RUN: lgc -o - -passes='require,function(lgc-combine-cooperative-matrix)' %s | FileCheck --check-prefixes=CHECK %s @@ -10,16 +36,16 @@ define <8 x float> @insert_transpose(<8 x float> %x) { ; CHECK: loop: ; CHECK-NEXT: [[V_LOOP:%.*]] = phi <8 x float> [ [[X]], [[ENTRY:%.*]] ], [ [[MULADD:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[F:%.*]] = call <8 x float> @getmat1() -; CHECK-NEXT: [[MULADD]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[F]], <8 x float> [[F]], <8 x float> [[V_LOOP]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; CHECK-NEXT: [[MULADD]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[F]], <8 x float> [[F]], <8 x float> [[V_LOOP]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ; CHECK-NEXT: [[CC:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END]] ; CHECK: end: ; CHECK-NEXT: [[R:%.*]] = phi <8 x float> [ [[MULADD]], [[LOOP]] ], [ [[X]], [[ENTRY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> [[R]], i32 1, i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> [[R]], i32 1, i32 0) ; CHECK-NEXT: ret <8 x float> [[TMP0]] ; entry: - %in.t = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %x, i32 1, i32 0) + %in.t = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %x, i32 1, i32 0) %guard = call i1 @getcc() br i1 %guard, label %loop, label %end @@ -27,9 +53,9 @@ loop: %v.loop = phi <8 x float> [ %in.t, %entry ], [ %v.next, %loop ] %f = call <8 x float> @getmat1() - %pre.t = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %v.loop, i32 1, i32 0) - %muladd = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %f, <8 x float> %f, <8 x float> %pre.t, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %v.next = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %muladd, i32 1, i32 0) + %pre.t = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %v.loop, i32 1, i32 0) + %muladd = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %f, <8 x float> %f, <8 x float> %pre.t, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %v.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %muladd, i32 1, i32 0) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end @@ -42,13 +68,13 @@ end: define <8 x float> @reuse_transpose(<8 x float> %x) { ; CHECK-LABEL: define <8 x float> @reuse_transpose ; CHECK-SAME: (<8 x float> [[X:%.*]]) { -; CHECK-NEXT: [[T1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> [[X]], i32 1, i32 0) -; CHECK-NEXT: [[R:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[T1]], <8 x float> [[X]], <8 x float> zeroinitializer, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; CHECK-NEXT: [[T1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> [[X]], i32 1, i32 0) +; CHECK-NEXT: [[R:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[T1]], <8 x float> [[X]], <8 x float> zeroinitializer, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ; CHECK-NEXT: ret <8 x float> [[R]] ; - %t1 = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %x, i32 1, i32 0) - %t2 = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %t1, i32 1, i32 0) - %r = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %t1, <8 x float> %t2, <8 x float> zeroinitializer, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %t1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %x, i32 1, i32 0) + %t2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %t1, i32 1, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %t1, <8 x float> %t2, <8 x float> zeroinitializer, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ret <8 x float> %r } @@ -57,22 +83,22 @@ define <8 x float> @insert_convert(ptr %ptr) { ; CHECK-LABEL: define <8 x float> @insert_convert ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 1, i32 0) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 1, i32 0) ; CHECK-NEXT: [[GUARD:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[GUARD]], label [[LOOP:%.*]], label [[END:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[V_LOOP:%.*]] = phi <8 x float> [ [[LOAD]], [[ENTRY:%.*]] ], [ [[MULADD:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[F:%.*]] = call <8 x float> @getmat1() -; CHECK-NEXT: [[MULADD]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[F]], <8 x float> [[F]], <8 x float> [[V_LOOP]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; CHECK-NEXT: [[MULADD]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[F]], <8 x float> [[F]], <8 x float> [[V_LOOP]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ; CHECK-NEXT: [[CC:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END]] ; CHECK: end: ; CHECK-NEXT: [[R:%.*]] = phi <8 x float> [ [[MULADD]], [[LOOP]] ], [ [[LOAD]], [[ENTRY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[R]], i32 1, i32 1, i32 1, i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[R]], i32 1, i32 1, i32 1, i32 0) ; CHECK-NEXT: ret <8 x float> [[TMP0]] ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) %guard = call i1 @getcc() br i1 %guard, label %loop, label %end @@ -80,9 +106,9 @@ loop: %v.loop = phi <8 x float> [ %load, %entry ], [ %v.next, %loop ] %f = call <8 x float> @getmat1() - %pre = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %v.loop, i32 1, i32 1, i32 0, i32 1) - %muladd = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %f, <8 x float> %f, <8 x float> %pre, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %v.next = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladd, i32 1, i32 1, i32 1, i32 0) + %pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %v.loop, i32 1, i32 1, i32 0, i32 1) + %muladd = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %f, <8 x float> %f, <8 x float> %pre, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %v.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %muladd, i32 1, i32 1, i32 1, i32 0) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end @@ -95,13 +121,13 @@ end: define <8 x float> @reuse_convert(<8 x float> %x) { ; CHECK-LABEL: define <8 x float> @reuse_convert ; CHECK-SAME: (<8 x float> [[X:%.*]]) { -; CHECK-NEXT: [[CVT1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[X]], i32 1, i32 1, i32 0, i32 1) -; CHECK-NEXT: [[R:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[X]], <8 x float> [[X]], <8 x float> [[CVT1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; CHECK-NEXT: [[CVT1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> [[X]], i32 1, i32 1, i32 0, i32 1) +; CHECK-NEXT: [[R:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[X]], <8 x float> [[X]], <8 x float> [[CVT1]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ; CHECK-NEXT: ret <8 x float> [[R]] ; - %cvt1 = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %x, i32 1, i32 1, i32 0, i32 1) - %cvt2 = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %cvt1, i32 1, i32 1, i32 1, i32 0) - %r = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %cvt2, <8 x float> %cvt2, <8 x float> %cvt1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %cvt1 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %x, i32 1, i32 1, i32 0, i32 1) + %cvt2 = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %cvt1, i32 1, i32 1, i32 1, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %cvt2, <8 x float> %cvt2, <8 x float> %cvt1, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) ret <8 x float> %r } @@ -109,20 +135,20 @@ define void @convert_to_acc_inner_binop(ptr %ptr0, ptr %ptr1) { ; CHECK-LABEL: define void @convert_to_acc_inner_binop ; CHECK-SAME: (ptr [[PTR0:%.*]], ptr [[PTR1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD_A:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR0]], i32 4, i1 false, i32 1, i32 1, i32 0) -; CHECK-NEXT: [[LOAD_B:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR1]], i32 4, i1 false, i32 1, i32 1, i32 0) -; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[LOAD_A]], <8 x float> [[LOAD_B]], i32 1, i32 1) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR0]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[BINOP]]) +; CHECK-NEXT: [[LOAD_A:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR0]], i32 4, i1 false, i32 1, i32 1, i32 0) +; CHECK-NEXT: [[LOAD_B:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR1]], i32 4, i1 false, i32 1, i32 1, i32 0) +; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[LOAD_A]], <8 x float> [[LOAD_B]], i32 1, i32 1) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR0]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[BINOP]]) ; CHECK-NEXT: ret void ; entry: - %load.a = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr0, i32 4, i1 false, i32 1, i32 0, i32 0) - %load.b = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr1, i32 4, i1 false, i32 1, i32 0, i32 0) - %conv.a = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load.a, i32 1, i32 1, i32 0, i32 1) - %conv.b = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load.b, i32 1, i32 1, i32 0, i32 1) - %binop = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %conv.a, <8 x float> %conv.b, i32 1, i32 1) - %conv.post = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %binop, i32 1, i32 1, i32 1, i32 0) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %conv.post) + %load.a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr0, i32 4, i1 false, i32 1, i32 0, i32 0) + %load.b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr1, i32 4, i1 false, i32 1, i32 0, i32 0) + %conv.a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load.a, i32 1, i32 1, i32 0, i32 1) + %conv.b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load.b, i32 1, i32 1, i32 0, i32 1) + %binop = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %conv.a, <8 x float> %conv.b, i32 1, i32 1) + %conv.post = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %binop, i32 1, i32 1, i32 1, i32 0) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %conv.post) ret void } @@ -130,17 +156,17 @@ define void @convert_to_acc_inner_times_scalar(ptr %ptr) { ; CHECK-LABEL: define void @convert_to_acc_inner_times_scalar ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 0, i32 0) -; CHECK-NEXT: [[SCALAR:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[LOAD]], half 0xH310F, i32 1, i32 0) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR]]) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[SCALAR:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[LOAD]], half 0xH310F, i32 1, i32 0) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR]]) ; CHECK-NEXT: ret void ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) - %conv.pre = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load, i32 1, i32 1, i32 0, i32 1) - %scalar = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %conv.pre, half 0xH310F, i32 1, i32 1) - %conv.post = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 1, i32 0) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %conv.post) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) + %conv.pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load, i32 1, i32 1, i32 0, i32 1) + %scalar = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %conv.pre, half 0xH310F, i32 1, i32 1) + %conv.post = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 1, i32 0) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %conv.post) ret void } @@ -148,20 +174,20 @@ define void @convert_to_fact_inner_binop(ptr %ptr0, ptr %ptr1) { ; CHECK-LABEL: define void @convert_to_fact_inner_binop ; CHECK-SAME: (ptr [[PTR0:%.*]], ptr [[PTR1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD_A:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR0]], i32 4, i1 false, i32 1, i32 0, i32 0) -; CHECK-NEXT: [[LOAD_B:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR1]], i32 4, i1 false, i32 1, i32 0, i32 0) -; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[LOAD_A]], <8 x float> [[LOAD_B]], i32 1, i32 0) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOP]]) +; CHECK-NEXT: [[LOAD_A:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR0]], i32 4, i1 false, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[LOAD_B:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR1]], i32 4, i1 false, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[LOAD_A]], <8 x float> [[LOAD_B]], i32 1, i32 0) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR0]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[BINOP]]) ; CHECK-NEXT: ret void ; entry: - %load.a = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr0, i32 4, i1 false, i32 1, i32 1, i32 0) - %load.b = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr1, i32 4, i1 false, i32 1, i32 1, i32 0) - %conv.a = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load.a, i32 1, i32 1, i32 1, i32 0) - %conv.b = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load.b, i32 1, i32 1, i32 1, i32 0) - %binop = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %conv.a, <8 x float> %conv.b, i32 1, i32 0) - %conv.post = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %binop, i32 1, i32 1, i32 0, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr0, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %conv.post) + %load.a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr0, i32 4, i1 false, i32 1, i32 1, i32 0) + %load.b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr1, i32 4, i1 false, i32 1, i32 1, i32 0) + %conv.a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load.a, i32 1, i32 1, i32 1, i32 0) + %conv.b = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load.b, i32 1, i32 1, i32 1, i32 0) + %binop = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %conv.a, <8 x float> %conv.b, i32 1, i32 0) + %conv.post = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %binop, i32 1, i32 1, i32 0, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr0, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %conv.post) ret void } @@ -169,17 +195,17 @@ define void @convert_to_fact_inner_times_scalar(ptr %ptr) { ; CHECK-LABEL: define void @convert_to_fact_inner_times_scalar ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 1, i32 0) -; CHECK-NEXT: [[SCALAR:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[LOAD]], half 0xH310F, i32 1, i32 1) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[SCALAR]]) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 1, i32 0) +; CHECK-NEXT: [[SCALAR:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[LOAD]], half 0xH310F, i32 1, i32 1) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[SCALAR]]) ; CHECK-NEXT: ret void ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 1, i32 0) - %conv.pre = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load, i32 1, i32 1, i32 1, i32 0) - %scalar = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %conv.pre, half 0xH310F, i32 1, i32 0) - %conv.post = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 0, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %conv.post) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 1, i32 0) + %conv.pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %load, i32 1, i32 1, i32 1, i32 0) + %scalar = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %conv.pre, half 0xH310F, i32 1, i32 0) + %conv.post = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 0, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %conv.post) ret void } @@ -187,25 +213,25 @@ define void @convert_to_acc_inner_chain(ptr %ptr) { ; CHECK-LABEL: define void @convert_to_acc_inner_chain ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 1, i32 0) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 1, i32 0) ; CHECK-NEXT: [[GUARD:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[GUARD]], label [[LOOP:%.*]], label [[END:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[V_LOOP:%.*]] = phi <8 x float> [ [[LOAD]], [[ENTRY:%.*]] ], [ [[SCALAR:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[F:%.*]] = call <8 x float> @getmat1() -; CHECK-NEXT: [[MULADD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> [[F]], <8 x float> [[F]], <8 x float> [[V_LOOP]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) -; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADD]], <8 x float> [[MULADD]], i32 1, i32 1) -; CHECK-NEXT: [[SCALAR]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[BINOP]], half 0xH310F, i32 1, i32 1) +; CHECK-NEXT: [[MULADD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> [[F]], <8 x float> [[F]], <8 x float> [[V_LOOP]], i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) +; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[MULADD]], <8 x float> [[MULADD]], i32 1, i32 1) +; CHECK-NEXT: [[SCALAR]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[BINOP]], half 0xH310F, i32 1, i32 1) ; CHECK-NEXT: [[CC:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END]] ; CHECK: end: ; CHECK-NEXT: [[PHI_END:%.*]] = phi <8 x float> [ [[SCALAR]], [[LOOP]] ], [ [[LOAD]], [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_END:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[PHI_END]], half 0xH312F, i32 1, i32 1) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[SCALAR_END]]) +; CHECK-NEXT: [[SCALAR_END:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[PHI_END]], half 0xH312F, i32 1, i32 1) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[SCALAR_END]]) ; CHECK-NEXT: ret void ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) %guard = call i1 @getcc() br i1 %guard, label %loop, label %end @@ -213,19 +239,19 @@ loop: %v.loop = phi <8 x float> [ %load, %entry ], [ %v.next, %loop ] %f = call <8 x float> @getmat1() - %pre = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %v.loop, i32 1, i32 1, i32 0, i32 1) - %muladd = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x float> %f, <8 x float> %f, <8 x float> %pre, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) - %binop = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladd, <8 x float> %muladd, i32 1, i32 1) - %scalar = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %binop, half 0xH310F, i32 1, i32 1) - %v.next = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 1, i32 0) + %pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %v.loop, i32 1, i32 1, i32 0, i32 1) + %muladd = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x float> %f, <8 x float> %f, <8 x float> %pre, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 1) + %binop = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %muladd, <8 x float> %muladd, i32 1, i32 1) + %scalar = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %binop, half 0xH310F, i32 1, i32 1) + %v.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 1, i32 0) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end end: %phi.end = phi <8 x float> [ %v.next, %loop ], [ %load, %entry ] - %scalar.end = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %phi.end, half 0xH312F, i32 1, i32 0) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scalar.end) + %scalar.end = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %phi.end, half 0xH312F, i32 1, i32 0) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scalar.end) ret void } @@ -233,24 +259,24 @@ define void @convert_to_fact_inner_chain(ptr %ptr) { ; CHECK-LABEL: define void @convert_to_fact_inner_chain ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 false, i32 1, i32 0, i32 0) ; CHECK-NEXT: [[GUARD:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[GUARD]], label [[LOOP:%.*]], label [[END:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[V_LOOP:%.*]] = phi <8 x float> [ [[LOAD]], [[ENTRY:%.*]] ], [ [[SCALAR:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[F:%.*]] = call <8 x float> @getmat1() -; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[V_LOOP]], <8 x float> [[V_LOOP]], i32 1, i32 0) -; CHECK-NEXT: [[SCALAR]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[BINOP]], half 0xH310F, i32 1, i32 0) +; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[V_LOOP]], <8 x float> [[V_LOOP]], i32 1, i32 0) +; CHECK-NEXT: [[SCALAR]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[BINOP]], half 0xH310F, i32 1, i32 0) ; CHECK-NEXT: [[CC:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END]] ; CHECK: end: ; CHECK-NEXT: [[PHI_END:%.*]] = phi <8 x float> [ [[SCALAR]], [[LOOP]] ], [ [[LOAD]], [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_END:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[PHI_END]], half 0xH312F, i32 1, i32 0) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR_END]]) +; CHECK-NEXT: [[SCALAR_END:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[PHI_END]], half 0xH312F, i32 1, i32 0) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR_END]]) ; CHECK-NEXT: ret void ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 1, i32 0) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 1, i32 0) %guard = call i1 @getcc() br i1 %guard, label %loop, label %end @@ -258,18 +284,18 @@ loop: %v.loop = phi <8 x float> [ %load, %entry ], [ %v.next, %loop ] %f = call <8 x float> @getmat1() - %pre = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %v.loop, i32 1, i32 1, i32 1, i32 0) - %binop = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %pre, <8 x float> %pre, i32 1, i32 0) - %scalar = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %binop, half 0xH310F, i32 1, i32 0) - %v.next = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 0, i32 1) + %pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %v.loop, i32 1, i32 1, i32 1, i32 0) + %binop = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %pre, <8 x float> %pre, i32 1, i32 0) + %scalar = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %binop, half 0xH310F, i32 1, i32 0) + %v.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 0, i32 1) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end end: %phi.end = phi <8 x float> [ %v.next, %loop ], [ %load, %entry ] - %scalar.end = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %phi.end, half 0xH312F, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %scalar.end) + %scalar.end = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %phi.end, half 0xH312F, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %scalar.end) ret void } @@ -277,17 +303,17 @@ define void @transpose_fact_inner_binop(ptr %ptr) { ; CHECK-LABEL: define void @transpose_fact_inner_binop ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0) -; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[LOAD]], <8 x float> [[LOAD]], i32 1, i32 0) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[LOAD]]) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[LOAD]], <8 x float> [[LOAD]], i32 1, i32 0) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[LOAD]]) ; CHECK-NEXT: ret void ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) - %trans.pre = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 0) - %binop = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %trans.pre, <8 x float> %trans.pre, i32 1, i32 0) - %trans.post = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 0) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %trans.post) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) + %trans.pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 0) + %binop = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %trans.pre, <8 x float> %trans.pre, i32 1, i32 0) + %trans.post = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 0) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %trans.post) ret void } @@ -295,17 +321,17 @@ define void @transpose_acc_inner_binop(ptr %ptr) { ; CHECK-LABEL: define void @transpose_acc_inner_binop ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0) -; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[LOAD]], <8 x float> [[LOAD]], i32 1, i32 1) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[LOAD]]) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0) +; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[LOAD]], <8 x float> [[LOAD]], i32 1, i32 1) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> [[LOAD]]) ; CHECK-NEXT: ret void ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 1, i32 0) - %trans.pre = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 1) - %binop = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %trans.pre, <8 x float> %trans.pre, i32 1, i32 1) - %trans.post = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %trans.post) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 1, i32 0) + %trans.pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 1) + %binop = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %trans.pre, <8 x float> %trans.pre, i32 1, i32 1) + %trans.post = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 1, i32 0, i32 16, <8 x float> %trans.post) ret void } @@ -313,17 +339,17 @@ define void @transpose_fact_inner_times_scalar(ptr %ptr) { ; CHECK-LABEL: define void @transpose_fact_inner_times_scalar ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0) -; CHECK-NEXT: [[SCALAR:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[LOAD]], half 0xH310F, i32 1, i32 0) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR]]) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[SCALAR:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[LOAD]], half 0xH310F, i32 1, i32 0) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR]]) ; CHECK-NEXT: ret void ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) - %trans.pre = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 0) - %scalar = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %trans.pre, half 0xH310F, i32 1, i32 0) - %trans.post = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 1, i32 0) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %trans.post) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) + %trans.pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 0) + %scalar = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %trans.pre, half 0xH310F, i32 1, i32 0) + %trans.post = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 1, i32 0) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %trans.post) ret void } @@ -331,17 +357,17 @@ define void @transpose_acc_inner_times_scalar(ptr %ptr) { ; CHECK-LABEL: define void @transpose_acc_inner_times_scalar ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0) -; CHECK-NEXT: [[SCALAR:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[LOAD]], half 0xH310F, i32 1, i32 1) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR]]) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 1, i32 0) +; CHECK-NEXT: [[SCALAR:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[LOAD]], half 0xH310F, i32 1, i32 1) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR]]) ; CHECK-NEXT: ret void ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 1, i32 0) - %trans.pre = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 1) - %scalar = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %trans.pre, half 0xH310F, i32 1, i32 1) - %trans.post = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 1, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %trans.post) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 1, i32 0) + %trans.pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %load, i32 1, i32 1) + %scalar = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %trans.pre, half 0xH310F, i32 1, i32 1) + %trans.post = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %scalar, i32 1, i32 1, i32 1, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %trans.post) ret void } @@ -349,24 +375,24 @@ define void @transpose_inner_chain(ptr %ptr) { ; CHECK-LABEL: define void @transpose_inner_chain ; CHECK-SAME: (ptr [[PTR:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr [[PTR]], i32 4, i1 true, i32 1, i32 0, i32 0) ; CHECK-NEXT: [[GUARD:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[GUARD]], label [[LOOP:%.*]], label [[END:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[V_LOOP:%.*]] = phi <8 x float> [ [[LOAD]], [[ENTRY:%.*]] ], [ [[SCALAR:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[F:%.*]] = call <8 x float> @getmat1() -; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[V_LOOP]], <8 x float> [[V_LOOP]], i32 1, i32 0) -; CHECK-NEXT: [[SCALAR]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[BINOP]], half 0xH310F, i32 1, i32 0) +; CHECK-NEXT: [[BINOP:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> [[V_LOOP]], <8 x float> [[V_LOOP]], i32 1, i32 0) +; CHECK-NEXT: [[SCALAR]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[BINOP]], half 0xH310F, i32 1, i32 0) ; CHECK-NEXT: [[CC:%.*]] = call i1 @getcc() ; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END]] ; CHECK: end: ; CHECK-NEXT: [[PHI_END:%.*]] = phi <8 x float> [ [[SCALAR]], [[LOOP]] ], [ [[LOAD]], [[ENTRY]] ] -; CHECK-NEXT: [[SCALAR_END:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> [[PHI_END]], half 0xH312F, i32 1, i32 0) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 false, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR_END]]) +; CHECK-NEXT: [[SCALAR_END:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> [[PHI_END]], half 0xH312F, i32 1, i32 0) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr [[PTR]], i32 4, i1 false, i32 1, i32 0, i32 0, i32 16, <8 x float> [[SCALAR_END]]) ; CHECK-NEXT: ret void ; entry: - %load = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) + %load = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr %ptr, i32 4, i1 false, i32 1, i32 0, i32 0) %guard = call i1 @getcc() br i1 %guard, label %loop, label %end @@ -374,28 +400,28 @@ loop: %v.loop = phi <8 x float> [ %load, %entry ], [ %v.next, %loop ] %f = call <8 x float> @getmat1() - %trans.pre = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %v.loop, i32 1, i32 0) - %binop = call <8 x float> (...) @lgc.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %trans.pre, <8 x float> %trans.pre, i32 1, i32 0) - %scalar = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %binop, half 0xH310F, i32 1, i32 0) - %v.next = call <8 x float> (...) @lgc.cooperative.matrix.transpose__v8f32(<8 x float> %scalar, i32 1, i32 0) + %trans.pre = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %v.loop, i32 1, i32 0) + %binop = call <8 x float> (...) @lgc.xdl.cooperative.matrix.binop__v8f32(i32 1, <8 x float> %trans.pre, <8 x float> %trans.pre, i32 1, i32 0) + %scalar = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %binop, half 0xH310F, i32 1, i32 0) + %v.next = call <8 x float> (...) @lgc.xdl.cooperative.matrix.transpose__v8f32(<8 x float> %scalar, i32 1, i32 0) %cc = call i1 @getcc() br i1 %cc, label %loop, label %end end: %phi.end = phi <8 x float> [ %v.next, %loop ], [ %load, %entry ] - %scalar.end = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %phi.end, half 0xH312F, i32 1, i32 0) - call void (...) @lgc.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scalar.end) + %scalar.end = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %phi.end, half 0xH312F, i32 1, i32 0) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr %ptr, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %scalar.end) ret void } declare i1 @getcc() declare <8 x float> @getmat1() -declare <8 x float> @lgc.cooperative.matrix.load__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.transpose__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.convert__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.muladd__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.times.scalar__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.binop__v8f32(...) -declare void @lgc.cooperative.matrix.store(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.load__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.transpose__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.convert__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.muladd__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.times.scalar__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.binop__v8f32(...) +declare void @lgc.xdl.cooperative.matrix.store(...) diff --git a/lgc/test/Transforms/CpsLowering/bad-max-argument-vgprs.lgc b/lgc/test/Transforms/CpsLowering/bad-max-argument-vgprs.lgc new file mode 100644 index 0000000000..4a1cc395bf --- /dev/null +++ b/lgc/test/Transforms/CpsLowering/bad-max-argument-vgprs.lgc @@ -0,0 +1,70 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; RUN: not --crash lgc -mcpu=gfx1030 -o - -lgc-use-init-whole-wave /dev/null -passes="require,lgc-mutate-entry-point" %s 2>&1 | FileCheck --check-prefixes=CHECK %s + +; REQUIRES: llvm-main-revision-ge-511860 + +; CHECK: Invalid number of inactive VGPRs, check lgc.cps.maxArgumentVgprs + +declare void @lgc.cps.jump(...) #0 + +define void @test(i32 %cspInit, i32 %arg, ptr %table, i32 %rcr) !lgc.cps !1 !lgc.shaderstage !2 !continuation !3 !continuation.state !4 { +entry: + %csp = alloca i32, align 4 + %local = alloca i32, align 4, addrspace(5) + store i32 %cspInit, ptr %csp, align 4 + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0, align 4 + %then.arg = add i32 %arg, 1 + %0 = load i32, ptr %csp, align 4 + store i32 %then.arg, ptr addrspace(5) %local + call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, i32 %0, i32 %rcr, i32 %then.arg) + unreachable +} + +declare !continuation !3 { ptr, ptr } @continuation.prototype.test(ptr, i1) + +declare ptr @continuation.malloc(i32) + +declare void @continuation.free(ptr) + +declare token @llvm.coro.id.retcon(i32, i32, ptr, ptr, ptr, ptr) #1 + +declare ptr @llvm.coro.begin(token, ptr writeonly) #1 + +attributes #0 = { noreturn } +attributes #1 = { nounwind } + +!continuation.stackAddrspace = !{!0} +!lgc.cps.maxArgumentVgprs = !{!5} + +!0 = !{i32 5} +!1 = !{i32 1} +!2 = !{i32 7} +!3 = !{ptr @test} +!4 = !{i32 0} +!5 = !{i32 1} diff --git a/lgc/test/Transforms/CpsLowering/continuation-basic.lgc b/lgc/test/Transforms/CpsLowering/continuation-basic.lgc index 172d8d82f2..b9fcc82dcc 100644 --- a/lgc/test/Transforms/CpsLowering/continuation-basic.lgc +++ b/lgc/test/Transforms/CpsLowering/continuation-basic.lgc @@ -1,97 +1,146 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function-signature -; RUN: lgc -mcpu=gfx1030 -o - -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s +; RUN: lgc -mcpu=gfx1030 -o - -lgc-use-init-whole-wave -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s + +; REQUIRES: llvm-main-revision-ge-511860 declare void @lgc.cps.jump(...) #0 define void @test(i32 %cspInit, i32 %arg, ptr %table, i32 %rcr) !lgc.cps !1 !lgc.shaderstage !2 !continuation !3 !continuation.state !4 { ; CHECK-LABEL: define {{[^@]+}}@test -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]], i32 [[RCR:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META4:![0-9]+]] !lgc.shaderstage [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] !continuation.state [[META7:![0-9]+]] { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]], i32 [[RCR:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]], i32 [[INACTIVE_VGPR2:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META5:![0-9]+]] !lgc.shaderstage [[META6:![0-9]+]] !continuation [[META7:![0-9]+]] !continuation.state [[META8:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TABLE]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 10: ; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[TABLE_0:%.*]] = getelementptr i32, ptr [[TABLE]], i32 0 ; CHECK-NEXT: [[CR_THEN:%.*]] = load i32, ptr [[TABLE_0]], align 4 ; CHECK-NEXT: [[THEN_ARG:%.*]] = add i32 [[ARG]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 -; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: store i32 [[THEN_ARG]], ptr addrspace(5) [[LOCAL]], align 4 +; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP8]], i32 [[CR_THEN]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP9]], i32 [[TMP7]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP10]], i32 poison, 3 -; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP11]], i32 [[RCR]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP12]], i32 [[THEN_ARG]], 5 -; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP13]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP14]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 7 -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP17]]) -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP16]], 3 -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP19]]) -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 [[TMP18]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP16]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP23]]) -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP22]] -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP16]], 1 -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP27]]) -; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP26]] -; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP30]], i1 true) -; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP15]], i32 [[TMP31]]) -; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[TMP15]], [[TMP32]] -; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP33]]) -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP32]]) -; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP34]]) -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP35]], -64 -; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP37]], i64 0 -; CHECK-NEXT: [[TMP39:%.*]] = bitcast <2 x i32> [[TMP38]] to i64 -; CHECK-NEXT: [[TMP40:%.*]] = inttoptr i64 [[TMP39]] to ptr -; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP42:%.*]] = bitcast i64 [[TMP41]] to <2 x i32> -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i32> [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP42]], i64 1 -; CHECK-NEXT: [[TMP45:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP48:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <20 x i32> [[TMP48]], i32 [[TMP43]], i64 1 -; CHECK-NEXT: [[TMP50:%.*]] = insertelement <20 x i32> [[TMP49]], i32 [[TMP44]], i64 2 -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> [[TMP50]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[TMP45]], i64 16 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[TMP46]], i64 17 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[TMP47]], i64 18 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32s(ptr inreg [[TMP40]], i32 inreg [[TMP36]], <20 x i32> inreg [[TMP67]], { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP13]], i32 0) +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[VCR]], [[ENTRY:%.*]] ], [ [[CR_THEN]], [[TMP10]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[CSPINIT]], [[ENTRY]] ], [ [[TMP11]], [[TMP10]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[ARG]], [[ENTRY]] ], [ poison, [[TMP10]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[TMP7]], [[ENTRY]] ], [ [[RCR]], [[TMP10]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[TMP8]], [[ENTRY]] ], [ [[THEN_ARG]], [[TMP10]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[RCR]], [[ENTRY]] ], [ poison, [[TMP10]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY]] ], [ poison, [[TMP10]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY]] ], [ poison, [[TMP10]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[INACTIVE_VGPR2]], [[ENTRY]] ], [ poison, [[TMP10]] ] +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i64 [[TMP21]] to <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP23]], i32 [[TMP12]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP24]], i32 [[TMP13]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP25]], i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP26]], i32 [[TMP15]], 4 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP27]], i32 [[TMP16]], 5 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP28]], i32 [[TMP17]], 6 +; CHECK-NEXT: [[TMP30:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP29]], i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP30]], i32 [[TMP19]], 8 +; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP31]], i32 [[TMP20]], 9 +; CHECK-NEXT: [[TMP33:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP32]], 1 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP35]]) +; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP34]], 3 +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP37]]) +; CHECK-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP34]], 2 +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 [[TMP40]] +; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP34]], 1 +; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP45]]) +; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 [[TMP44]] +; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP48]], i1 true) +; CHECK-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP33]], i32 [[TMP49]]) +; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i32 [[TMP33]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP51]]) +; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP50]], -64 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <2 x i32> [[TMP22]], i32 [[TMP53]], i64 0 +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i32> [[TMP54]] to i64 +; CHECK-NEXT: [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr +; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = bitcast i64 [[TMP57]] to <2 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <2 x i32> [[TMP58]], i64 0 +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <2 x i32> [[TMP58]], i64 1 +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[TMP59]], i64 1 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[TMP60]], i64 2 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP78:%.*]] = insertelement <20 x i32> [[TMP77]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP79:%.*]] = insertelement <20 x i32> [[TMP78]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <20 x i32> [[TMP79]], i32 [[TMP61]], i64 16 +; CHECK-NEXT: [[TMP81:%.*]] = insertelement <20 x i32> [[TMP80]], i32 [[TMP62]], i64 17 +; CHECK-NEXT: [[TMP82:%.*]] = insertelement <20 x i32> [[TMP81]], i32 [[TMP63]], i64 18 +; CHECK-NEXT: [[TMP83:%.*]] = insertelement <20 x i32> [[TMP82]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32i32i32i32s(ptr inreg [[TMP56]], i32 inreg [[TMP52]], <20 x i32> inreg [[TMP83]], { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP32]], i32 0) ; CHECK-NEXT: unreachable ; entry: %csp = alloca i32, align 4 + %local = alloca i32, align 4, addrspace(5) store i32 %cspInit, ptr %csp, align 4 %table.0 = getelementptr i32, ptr %table, i32 0 %cr.then = load i32, ptr %table.0, align 4 %then.arg = add i32 %arg, 1 %0 = load i32, ptr %csp, align 4 + store i32 %then.arg, ptr addrspace(5) %local call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, i32 %0, i32 poison, i32 %rcr, i32 %then.arg) unreachable } @@ -110,9 +159,12 @@ attributes #0 = { noreturn } attributes #1 = { nounwind } !continuation.stackAddrspace = !{!0} +!lgc.cps.maxArgumentVgprs = !{!5} !0 = !{i32 5} !1 = !{i32 1} !2 = !{i32 7} !3 = !{ptr @test} !4 = !{i32 0} +!5 = !{i32 8} +; diff --git a/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc b/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc index 3e64a3ac94..7c1761eb43 100644 --- a/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function-signature ; RUN: lgc -mcpu=gfx1030 -o - -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s @@ -10,7 +36,7 @@ declare ptr addrspace(32) @lgc.cps.get.vsp() #2 define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc.shaderstage !3 !lgc.rt.shaderstage !3 { ; CHECK-LABEL: define {{[^@]+}}@lgc.shader.CS.main ; CHECK-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[PAD4:%.*]], i32 inreg noundef [[PAD5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], <3 x i32> noundef [[LOCALINVOCATIONID:%.*]]) -; CHECK-SAME: #[[ATTR3:[0-9]+]] !lgc.shaderstage [[META5:![0-9]+]] !lgc.rt.shaderstage [[META5]] { +; CHECK-SAME: #[[ATTR3:[0-9]+]] !lgc.shaderstage [[META6:![0-9]+]] !lgc.rt.shaderstage [[META6]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -18,13 +44,11 @@ define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[USERDATA0]], i64 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[USERDATA1]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[USERDATA2]], i64 2 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[USERDATA3]], i64 3 -; CHECK-NEXT: [[PTR:%.*]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[TMP10]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[USERDATA0]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[USERDATA1]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[USERDATA2]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[USERDATA3]], i64 3 +; CHECK-NEXT: [[PTR:%.*]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[TMP8]], i1 false) ; CHECK-NEXT: [[P0:%.*]] = getelementptr i32, ptr addrspace(7) [[PTR]], i32 0 ; CHECK-NEXT: [[I_VSP:%.*]] = load i32, ptr addrspace(7) [[P0]], align 4 ; CHECK-NEXT: store i32 [[I_VSP]], ptr [[CSP]], align 4 @@ -32,14 +56,16 @@ define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc ; CHECK-NEXT: [[CR:%.*]] = load i32, ptr addrspace(7) [[P1]], align 4 ; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr addrspace(7) [[PTR]], i32 2 ; CHECK-NEXT: [[ARG:%.*]] = load i32, ptr addrspace(7) [[P2]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 ; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] ; CHECK: tail.block: +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[TMP10]] to <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { i32, i32, i32, i32, i32 } poison, i32 [[CR]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP12]], i32 [[TMP11]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP12]], i32 [[TMP9]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP13]], i32 poison, 2 ; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP14]], i32 [[ARG]], 3 -; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP15]], i32 [[TMP11]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { i32, i32, i32, i32, i32 } [[TMP15]], i32 [[TMP9]], 4 ; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { i32, i32, i32, i32, i32 } [[TMP16]], 0 ; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP18]]) @@ -48,7 +74,7 @@ define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP17]], [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP22]]) ; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP21]], -64 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP24]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP11]], i32 [[TMP24]], i64 0 ; CHECK-NEXT: [[TMP26:%.*]] = bitcast <2 x i32> [[TMP25]] to i64 ; CHECK-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr ; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 diff --git a/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc b/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc index 1f64393281..27ae69d708 100644 --- a/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc @@ -1,5 +1,33 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function-signature -; RUN: lgc -mcpu=gfx1030 -o - -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s +; RUN: lgc -mcpu=gfx1030 -o - -lgc-use-init-whole-wave -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s + +; REQUIRES: llvm-main-revision-ge-511860 declare void @lgc.cps.jump(...) #0 @@ -19,7 +47,7 @@ declare void @lgc.cps.complete() define void @test.0(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation !3 !continuation.state !4 { ; CHECK-LABEL: define {{[^@]+}}@test.0 -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META4:![0-9]+]] !lgc.shaderstage [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] !continuation.state [[META7:![0-9]+]] { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META5:![0-9]+]] !lgc.shaderstage [[META6:![0-9]+]] !continuation [[META7:![0-9]+]] !continuation.state [[META8:![0-9]+]] { ; CHECK-NEXT: AllocaSpillBB: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -27,8 +55,9 @@ define void @test.0(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: ; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 12 @@ -45,70 +74,75 @@ define void @test.0(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP15]], i32 0 ; CHECK-NEXT: store i8 99, ptr addrspace(5) [[TMP16]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP18]], i32 add (i32 ptrtoint (ptr @test.1 to i32), i32 1), 1 -; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP19]], i32 [[TMP17]], 2 -; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP20]], i32 poison, 3 -; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP21]], i32 poison, 4 -; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP22]], i32 [[TMP14]], 5 -; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP23]], i32 [[TMP11]], 6 -; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP24]], 1 -; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP25]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 7 -; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP28]]) -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP27]], 3 -; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP30]]) -; CHECK-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP31]], i32 [[TMP29]] -; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP27]], 2 -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP34]]) -; CHECK-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 [[TMP35]], i32 [[TMP33]] -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP27]], 1 -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP38]]) -; CHECK-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 [[TMP39]], i32 [[TMP37]] -; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP41]], i1 true) -; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP26]], i32 [[TMP42]]) -; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP26]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP44]]) -; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP43]]) -; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP45]]) -; CHECK-NEXT: [[TMP48:%.*]] = and i32 [[TMP46]], -64 -; CHECK-NEXT: [[TMP49:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP48]], i64 0 -; CHECK-NEXT: [[TMP50:%.*]] = bitcast <2 x i32> [[TMP49]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = inttoptr i64 [[TMP50]] to ptr -; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP53:%.*]] = bitcast i64 [[TMP52]] to <2 x i32> -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <2 x i32> [[TMP53]], i64 0 -; CHECK-NEXT: [[TMP55:%.*]] = extractelement <2 x i32> [[TMP53]], i64 1 -; CHECK-NEXT: [[TMP56:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP57:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[TMP54]], i64 1 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[TMP55]], i64 2 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[TMP56]], i64 16 -; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[TMP57]], i64 17 -; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[TMP58]], i64 18 -; CHECK-NEXT: [[TMP78:%.*]] = insertelement <20 x i32> [[TMP77]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32s(ptr inreg [[TMP51]], i32 inreg [[TMP47]], <20 x i32> inreg [[TMP78]], { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP24]], i32 0) +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[VCR]], [[ALLOCASPILLBB:%.*]] ], [ add (i32 ptrtoint (ptr @test.1 to i32), i32 1), [[TMP6]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[CSPINIT]], [[ALLOCASPILLBB]] ], [ [[TMP17]], [[TMP6]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP22:%.*]] = phi i32 [ poison, [[ALLOCASPILLBB]] ], [ [[TMP14]], [[TMP6]] ] +; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ poison, [[ALLOCASPILLBB]] ], [ [[TMP11]], [[TMP6]] ] +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i64 [[TMP24]] to <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP26]], i32 [[TMP18]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP27]], i32 [[TMP19]], 2 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP28]], i32 [[TMP20]], 3 +; CHECK-NEXT: [[TMP30:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP29]], i32 [[TMP21]], 4 +; CHECK-NEXT: [[TMP31:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP30]], i32 [[TMP22]], 5 +; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP31]], i32 [[TMP23]], 6 +; CHECK-NEXT: [[TMP33:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP32]], 1 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP35]]) +; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP34]], 3 +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP37]]) +; CHECK-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP34]], 2 +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 [[TMP40]] +; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[TMP34]], 1 +; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP45]]) +; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 [[TMP44]] +; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP48]], i1 true) +; CHECK-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP33]], i32 [[TMP49]]) +; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i32 [[TMP33]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP51]]) +; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP50]], -64 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <2 x i32> [[TMP25]], i32 [[TMP53]], i64 0 +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <2 x i32> [[TMP54]] to i64 +; CHECK-NEXT: [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr +; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = bitcast i64 [[TMP57]] to <2 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <2 x i32> [[TMP58]], i64 0 +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <2 x i32> [[TMP58]], i64 1 +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP63:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[TMP59]], i64 1 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[TMP60]], i64 2 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP78:%.*]] = insertelement <20 x i32> [[TMP77]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP79:%.*]] = insertelement <20 x i32> [[TMP78]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <20 x i32> [[TMP79]], i32 [[TMP61]], i64 16 +; CHECK-NEXT: [[TMP81:%.*]] = insertelement <20 x i32> [[TMP80]], i32 [[TMP62]], i64 17 +; CHECK-NEXT: [[TMP82:%.*]] = insertelement <20 x i32> [[TMP81]], i32 [[TMP63]], i64 18 +; CHECK-NEXT: [[TMP83:%.*]] = insertelement <20 x i32> [[TMP82]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32s(ptr inreg [[TMP56]], i32 inreg [[TMP52]], <20 x i32> inreg [[TMP83]], { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP32]], i32 0) ; CHECK-NEXT: unreachable ; AllocaSpillBB: @@ -136,7 +170,7 @@ AllocaSpillBB: define void @test.1(i32 %cspInit, i32 %p2, i32 %q1) !lgc.cps !1 !lgc.shaderstage !2 !continuation !5 !continuation.state !4 { ; CHECK-LABEL: define {{[^@]+}}@test.1 -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[P2:%.*]], i32 [[Q1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META4]] !lgc.shaderstage [[META5]] !continuation [[META8:![0-9]+]] !continuation.state [[META7]] { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[P2:%.*]], i32 [[Q1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META5]] !lgc.shaderstage [[META6]] !continuation [[META9:![0-9]+]] !continuation.state [[META8]] { ; CHECK-NEXT: AllocaSpillBB: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -144,8 +178,9 @@ define void @test.1(i32 %cspInit, i32 %p2, i32 %q1) !lgc.cps !1 !lgc.shaderstage ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: ; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i32 [[Q1]] to ptr addrspace(5) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP7]], i32 0 @@ -154,68 +189,71 @@ define void @test.1(i32 %cspInit, i32 %p2, i32 %q1) !lgc.cps !1 !lgc.shaderstage ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP9]], i32 0 ; CHECK-NEXT: [[N99:%.*]] = load i8, ptr addrspace(5) [[TMP10]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP12]], i32 add (i32 ptrtoint (ptr @test.2 to i32), i32 1), 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP13]], i32 [[TMP11]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP14]], i32 poison, 3 -; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP15]], i32 poison, 4 -; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP16]], 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP17]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7 -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP20]]) -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP19]], 3 -; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP21]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP19]], 2 -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP26]]) -; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP27]], i32 [[TMP25]] -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP19]], 1 -; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP30]]) -; CHECK-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP31]], i32 [[TMP29]] -; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP33]], i1 true) -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP18]], i32 [[TMP34]]) -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP18]], [[TMP35]] -; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP36]]) -; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP35]]) -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP37]]) -; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP38]], -64 -; CHECK-NEXT: [[TMP41:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP40]], i64 0 -; CHECK-NEXT: [[TMP42:%.*]] = bitcast <2 x i32> [[TMP41]] to i64 -; CHECK-NEXT: [[TMP43:%.*]] = inttoptr i64 [[TMP42]] to ptr -; CHECK-NEXT: [[TMP44:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP45:%.*]] = bitcast i64 [[TMP44]] to <2 x i32> -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i32> [[TMP45]], i64 0 -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP45]], i64 1 -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP51:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> [[TMP51]], i32 [[TMP46]], i64 1 -; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[TMP47]], i64 2 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[TMP48]], i64 16 -; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[TMP49]], i64 17 -; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[TMP50]], i64 18 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32s(ptr inreg [[TMP43]], i32 inreg [[TMP39]], <20 x i32> inreg [[TMP70]], { <3 x i32>, i32, i32, i32, i32 } [[TMP16]], i32 0) +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[VCR]], [[ALLOCASPILLBB:%.*]] ], [ add (i32 ptrtoint (ptr @test.2 to i32), i32 1), [[TMP6]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[CSPINIT]], [[ALLOCASPILLBB]] ], [ [[TMP11]], [[TMP6]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[P2]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[Q1]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64 [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP18]], i32 [[TMP12]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP19]], i32 [[TMP13]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP20]], i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP21]], i32 [[TMP15]], 4 +; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP25]]) +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP24]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP27]]) +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP26]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP24]], 2 +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP31]]) +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP24]], 1 +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP35]]) +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 [[TMP34]] +; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP38]], i1 true) +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP23]], i32 [[TMP39]]) +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP23]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], -64 +; CHECK-NEXT: [[TMP44:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP43]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = bitcast <2 x i32> [[TMP44]] to i64 +; CHECK-NEXT: [[TMP46:%.*]] = inttoptr i64 [[TMP45]] to ptr +; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = bitcast i64 [[TMP47]] to <2 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[TMP48]], i64 0 +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <2 x i32> [[TMP48]], i64 1 +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[TMP49]], i64 1 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[TMP50]], i64 2 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[TMP51]], i64 16 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[TMP52]], i64 17 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[TMP53]], i64 18 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32s(ptr inreg [[TMP46]], i32 inreg [[TMP42]], <20 x i32> inreg [[TMP73]], { <3 x i32>, i32, i32, i32, i32 } [[TMP22]], i32 0) ; CHECK-NEXT: unreachable ; AllocaSpillBB: @@ -235,7 +273,7 @@ AllocaSpillBB: define void @test.2(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation !6 !continuation.state !4 { ; CHECK-LABEL: define {{[^@]+}}@test.2 -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META4]] !lgc.shaderstage [[META5]] !continuation [[META9:![0-9]+]] !continuation.state [[META7]] { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META5]] !lgc.shaderstage [[META6]] !continuation [[META10:![0-9]+]] !continuation.state [[META8]] { ; CHECK-NEXT: AllocaSpillBB: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -243,8 +281,9 @@ define void @test.2(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: ; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], -12 @@ -254,70 +293,74 @@ define void @test.2(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -12 ; CHECK-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 -; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <3 x i32>, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { <3 x i32>, i32, i32, i32 } [[TMP13]], i32 0, 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <3 x i32>, i32, i32, i32 } [[TMP14]], i32 poison, 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <3 x i32>, i32, i32, i32 } [[TMP15]], i32 poison, 3 -; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <3 x i32>, i32, i32, i32 } [[TMP16]], 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP17]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7 -; CHECK-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP20]]) -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP19]], 3 -; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP22]]) -; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP21]] -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP19]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[VCR]], [[ALLOCASPILLBB:%.*]] ], [ 0, [[TMP6]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[CSPINIT]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64 [[TMP17]] to <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP19]], i32 [[TMP13]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP20]], i32 [[TMP14]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP21]], i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP22]], i32 [[TMP16]], 4 +; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32 } [[TMP23]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP26]]) -; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP27]], i32 [[TMP25]] -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP19]], 1 -; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP30]]) -; CHECK-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP31]], i32 [[TMP29]] -; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP33]], i1 true) -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP18]], i32 [[TMP34]]) -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP18]], [[TMP35]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP25]], 3 +; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP28]]) +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP29]], i32 [[TMP27]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP25]], 2 +; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP32]]) +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP33]], i32 [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP25]], 1 ; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP36]]) -; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP35]]) -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP37]]) -; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP38]], 0 -; CHECK-NEXT: br i1 [[TMP40]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP37]], i32 [[TMP35]] +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP39]], i1 true) +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP24]], i32 [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP24]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP42]]) +; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP41]], 0 +; CHECK-NEXT: br i1 [[TMP44]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] ; CHECK: chain.block: -; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP38]], -64 -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP41]], i64 0 -; CHECK-NEXT: [[TMP43:%.*]] = bitcast <2 x i32> [[TMP42]] to i64 -; CHECK-NEXT: [[TMP44:%.*]] = inttoptr i64 [[TMP43]] to ptr -; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP46:%.*]] = bitcast i64 [[TMP45]] to <2 x i32> -; CHECK-NEXT: [[TMP47:%.*]] = extractelement <2 x i32> [[TMP46]], i64 0 -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <2 x i32> [[TMP46]], i64 1 -; CHECK-NEXT: [[TMP49:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP51:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> [[TMP52]], i32 [[TMP47]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[TMP48]], i64 2 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[TMP49]], i64 16 -; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[TMP50]], i64 17 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[TMP51]], i64 18 -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32s(ptr inreg [[TMP44]], i32 inreg [[TMP39]], <20 x i32> inreg [[TMP71]], { <3 x i32>, i32, i32, i32 } [[TMP16]], i32 0) +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP41]], -64 +; CHECK-NEXT: [[TMP46:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP45]], i64 0 +; CHECK-NEXT: [[TMP47:%.*]] = bitcast <2 x i32> [[TMP46]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr +; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP50:%.*]] = bitcast i64 [[TMP49]] to <2 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x i32> [[TMP50]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <2 x i32> [[TMP50]], i64 1 +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[TMP51]], i64 1 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[TMP52]], i64 2 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[TMP53]], i64 16 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[TMP54]], i64 17 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[TMP55]], i64 18 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32s(ptr inreg [[TMP48]], i32 inreg [[TMP43]], <20 x i32> inreg [[TMP75]], { <3 x i32>, i32, i32, i32, i32 } [[TMP23]], i32 0) ; CHECK-NEXT: unreachable ; CHECK: ret.block: ; CHECK-NEXT: ret void @@ -338,7 +381,7 @@ AllocaSpillBB: define void @test.gep(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation !7 !continuation.state !4 { ; CHECK-LABEL: define {{[^@]+}}@test.gep -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META4]] !lgc.shaderstage [[META5]] !continuation [[META10:![0-9]+]] !continuation.state [[META7]] { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META5]] !lgc.shaderstage [[META6]] !continuation [[META11:![0-9]+]] !continuation.state [[META8]] { ; CHECK-NEXT: AllocaSpillBB: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -346,8 +389,9 @@ define void @test.gep(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuatio ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: ; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 12 @@ -378,70 +422,75 @@ define void @test.gep(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuatio ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP25]], i32 0 ; CHECK-NEXT: store i32 [[TMP24]], ptr addrspace(5) [[TMP26]], align 4 ; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[CSP]], align 4 -; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP28]], i32 add (i32 ptrtoint (ptr @test.1 to i32), i32 1), 1 -; CHECK-NEXT: [[TMP30:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP29]], i32 [[TMP27]], 2 -; CHECK-NEXT: [[TMP31:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP30]], i32 poison, 3 -; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP31]], i32 poison, 4 -; CHECK-NEXT: [[TMP33:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP32]], i32 [[TMP24]], 5 -; CHECK-NEXT: [[TMP34:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP33]], i32 [[TMP24]], 6 -; CHECK-NEXT: [[TMP35:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP34]], 1 -; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP35]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 7 -; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP38]]) -; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP37]], 3 -; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP40]]) -; CHECK-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 -; CHECK-NEXT: [[TMP43:%.*]] = select i1 [[TMP42]], i32 [[TMP41]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP37]], 2 -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP44]]) -; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[TMP45]], i32 [[TMP43]] -; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i32 [[TMP37]], 1 -; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP48]]) -; CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 -; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP49]], i32 [[TMP47]] -; CHECK-NEXT: [[TMP52:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP51]], i1 true) -; CHECK-NEXT: [[TMP53:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP36]], i32 [[TMP52]]) -; CHECK-NEXT: [[TMP54:%.*]] = icmp eq i32 [[TMP36]], [[TMP53]] -; CHECK-NEXT: [[TMP55:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP54]]) -; CHECK-NEXT: [[TMP56:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP53]]) -; CHECK-NEXT: [[TMP57:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP55]]) -; CHECK-NEXT: [[TMP58:%.*]] = and i32 [[TMP56]], -64 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP58]], i64 0 -; CHECK-NEXT: [[TMP60:%.*]] = bitcast <2 x i32> [[TMP59]] to i64 -; CHECK-NEXT: [[TMP61:%.*]] = inttoptr i64 [[TMP60]] to ptr -; CHECK-NEXT: [[TMP62:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP63:%.*]] = bitcast i64 [[TMP62]] to <2 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <2 x i32> [[TMP63]], i64 0 -; CHECK-NEXT: [[TMP65:%.*]] = extractelement <2 x i32> [[TMP63]], i64 1 -; CHECK-NEXT: [[TMP66:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP67:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[TMP64]], i64 1 -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[TMP65]], i64 2 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP78:%.*]] = insertelement <20 x i32> [[TMP77]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP79:%.*]] = insertelement <20 x i32> [[TMP78]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP80:%.*]] = insertelement <20 x i32> [[TMP79]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP81:%.*]] = insertelement <20 x i32> [[TMP80]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP82:%.*]] = insertelement <20 x i32> [[TMP81]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP83:%.*]] = insertelement <20 x i32> [[TMP82]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP84:%.*]] = insertelement <20 x i32> [[TMP83]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP85:%.*]] = insertelement <20 x i32> [[TMP84]], i32 [[TMP66]], i64 16 -; CHECK-NEXT: [[TMP86:%.*]] = insertelement <20 x i32> [[TMP85]], i32 [[TMP67]], i64 17 -; CHECK-NEXT: [[TMP87:%.*]] = insertelement <20 x i32> [[TMP86]], i32 [[TMP68]], i64 18 -; CHECK-NEXT: [[TMP88:%.*]] = insertelement <20 x i32> [[TMP87]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32s(ptr inreg [[TMP61]], i32 inreg [[TMP57]], <20 x i32> inreg [[TMP88]], { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP34]], i32 0) +; CHECK-NEXT: [[TMP28:%.*]] = phi i32 [ [[VCR]], [[ALLOCASPILLBB:%.*]] ], [ add (i32 ptrtoint (ptr @test.1 to i32), i32 1), [[TMP6]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi i32 [ [[CSPINIT]], [[ALLOCASPILLBB]] ], [ [[TMP27]], [[TMP6]] ] +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP32:%.*]] = phi i32 [ poison, [[ALLOCASPILLBB]] ], [ [[TMP24]], [[TMP6]] ] +; CHECK-NEXT: [[TMP33:%.*]] = phi i32 [ poison, [[ALLOCASPILLBB]] ], [ [[TMP24]], [[TMP6]] ] +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP35:%.*]] = bitcast i64 [[TMP34]] to <2 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP37:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP36]], i32 [[TMP28]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP37]], i32 [[TMP29]], 2 +; CHECK-NEXT: [[TMP39:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP38]], i32 [[TMP30]], 3 +; CHECK-NEXT: [[TMP40:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP39]], i32 [[TMP31]], 4 +; CHECK-NEXT: [[TMP41:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP40]], i32 [[TMP32]], 5 +; CHECK-NEXT: [[TMP42:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP41]], i32 [[TMP33]], 6 +; CHECK-NEXT: [[TMP43:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP42]], 1 +; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 7 +; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP45]]) +; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i32 [[TMP44]], 3 +; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP47]]) +; CHECK-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i32 [[TMP44]], 2 +; CHECK-NEXT: [[TMP52:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP51]]) +; CHECK-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 [[TMP50]] +; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i32 [[TMP44]], 1 +; CHECK-NEXT: [[TMP56:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP55]]) +; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 +; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 [[TMP54]] +; CHECK-NEXT: [[TMP59:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP58]], i1 true) +; CHECK-NEXT: [[TMP60:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP43]], i32 [[TMP59]]) +; CHECK-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP43]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP61]]) +; CHECK-NEXT: [[TMP63:%.*]] = and i32 [[TMP60]], -64 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <2 x i32> [[TMP35]], i32 [[TMP63]], i64 0 +; CHECK-NEXT: [[TMP65:%.*]] = bitcast <2 x i32> [[TMP64]] to i64 +; CHECK-NEXT: [[TMP66:%.*]] = inttoptr i64 [[TMP65]] to ptr +; CHECK-NEXT: [[TMP67:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP68:%.*]] = bitcast i64 [[TMP67]] to <2 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = extractelement <2 x i32> [[TMP68]], i64 0 +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <2 x i32> [[TMP68]], i64 1 +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP72:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[TMP69]], i64 1 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[TMP70]], i64 2 +; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP78:%.*]] = insertelement <20 x i32> [[TMP77]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP79:%.*]] = insertelement <20 x i32> [[TMP78]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <20 x i32> [[TMP79]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP81:%.*]] = insertelement <20 x i32> [[TMP80]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP82:%.*]] = insertelement <20 x i32> [[TMP81]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP83:%.*]] = insertelement <20 x i32> [[TMP82]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP84:%.*]] = insertelement <20 x i32> [[TMP83]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP85:%.*]] = insertelement <20 x i32> [[TMP84]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP86:%.*]] = insertelement <20 x i32> [[TMP85]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP87:%.*]] = insertelement <20 x i32> [[TMP86]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP88:%.*]] = insertelement <20 x i32> [[TMP87]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP89:%.*]] = insertelement <20 x i32> [[TMP88]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP90:%.*]] = insertelement <20 x i32> [[TMP89]], i32 [[TMP71]], i64 16 +; CHECK-NEXT: [[TMP91:%.*]] = insertelement <20 x i32> [[TMP90]], i32 [[TMP72]], i64 17 +; CHECK-NEXT: [[TMP92:%.*]] = insertelement <20 x i32> [[TMP91]], i32 [[TMP73]], i64 18 +; CHECK-NEXT: [[TMP93:%.*]] = insertelement <20 x i32> [[TMP92]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32s(ptr inreg [[TMP66]], i32 inreg [[TMP62]], <20 x i32> inreg [[TMP93]], { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP42]], i32 0) ; CHECK-NEXT: unreachable ; AllocaSpillBB: @@ -483,7 +532,7 @@ AllocaSpillBB: define void @test.nested.gep(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !continuation !8 !continuation.state !4 { ; CHECK-LABEL: define {{[^@]+}}@test.nested.gep -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META4]] !lgc.shaderstage [[META5]] !continuation [[META11:![0-9]+]] !continuation.state [[META7]] { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[CSPINIT:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META5]] !lgc.shaderstage [[META6]] !continuation [[META12:![0-9]+]] !continuation.state [[META8]] { ; CHECK-NEXT: AllocaSpillBB: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() @@ -491,8 +540,9 @@ define void @test.nested.gep(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !cont ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 6: ; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 12 @@ -506,70 +556,75 @@ define void @test.nested.gep(i32 %cspInit) !lgc.cps !1 !lgc.shaderstage !2 !cont ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(5) [[TMP13]], i32 0 ; CHECK-NEXT: store i32 [[TMP12]], ptr addrspace(5) [[TMP14]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 -; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP16]], i32 add (i32 ptrtoint (ptr @test.1 to i32), i32 1), 1 -; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP17]], i32 [[TMP15]], 2 -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP18]], i32 poison, 3 -; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP19]], i32 poison, 4 -; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP20]], i32 [[TMP12]], 5 -; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP21]], i32 [[TMP12]], 6 -; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP22]], 1 -; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP23]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7 -; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP26]]) -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP25]], 3 -; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP28]]) -; CHECK-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -; CHECK-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP29]], i32 [[TMP27]] -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP25]], 2 -; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP32]]) -; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP33]], i32 [[TMP31]] -; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP25]], 1 -; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP36]]) -; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP37]], i32 [[TMP35]] -; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP39]], i1 true) -; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP24]], i32 [[TMP40]]) -; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP24]], [[TMP41]] -; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP42]]) -; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP41]]) -; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP43]]) -; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP44]], -64 -; CHECK-NEXT: [[TMP47:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP46]], i64 0 -; CHECK-NEXT: [[TMP48:%.*]] = bitcast <2 x i32> [[TMP47]] to i64 -; CHECK-NEXT: [[TMP49:%.*]] = inttoptr i64 [[TMP48]] to ptr -; CHECK-NEXT: [[TMP50:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP51:%.*]] = bitcast i64 [[TMP50]] to <2 x i32> -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <2 x i32> [[TMP51]], i64 0 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <2 x i32> [[TMP51]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP55:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP56:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[TMP52]], i64 1 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[TMP53]], i64 2 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[TMP54]], i64 16 -; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[TMP55]], i64 17 -; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[TMP56]], i64 18 -; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32s(ptr inreg [[TMP49]], i32 inreg [[TMP45]], <20 x i32> inreg [[TMP76]], { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP22]], i32 0) +; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[VCR]], [[ALLOCASPILLBB:%.*]] ], [ add (i32 ptrtoint (ptr @test.1 to i32), i32 1), [[TMP6]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[CSPINIT]], [[ALLOCASPILLBB]] ], [ [[TMP15]], [[TMP6]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ALLOCASPILLBB]] ], [ poison, [[TMP6]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ poison, [[ALLOCASPILLBB]] ], [ [[TMP12]], [[TMP6]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ poison, [[ALLOCASPILLBB]] ], [ [[TMP12]], [[TMP6]] ] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i64 [[TMP22]] to <2 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP24]], i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP25]], i32 [[TMP17]], 2 +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP26]], i32 [[TMP18]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP27]], i32 [[TMP19]], 4 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP28]], i32 [[TMP20]], 5 +; CHECK-NEXT: [[TMP30:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP29]], i32 [[TMP21]], 6 +; CHECK-NEXT: [[TMP31:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP30]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP33]]) +; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP32]], 3 +; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP35]]) +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 [[TMP34]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP32]], 2 +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP39]]) +; CHECK-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[TMP32]], 1 +; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 [[TMP42]] +; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP46]], i1 true) +; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP31]], i32 [[TMP47]]) +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i32 [[TMP31]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP49]]) +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP48]], -64 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP51]], i64 0 +; CHECK-NEXT: [[TMP53:%.*]] = bitcast <2 x i32> [[TMP52]] to i64 +; CHECK-NEXT: [[TMP54:%.*]] = inttoptr i64 [[TMP53]] to ptr +; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP56:%.*]] = bitcast i64 [[TMP55]] to <2 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = extractelement <2 x i32> [[TMP56]], i64 0 +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <2 x i32> [[TMP56]], i64 1 +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[TMP57]], i64 1 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[TMP58]], i64 2 +; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP75:%.*]] = insertelement <20 x i32> [[TMP74]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP76:%.*]] = insertelement <20 x i32> [[TMP75]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP77:%.*]] = insertelement <20 x i32> [[TMP76]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP78:%.*]] = insertelement <20 x i32> [[TMP77]], i32 [[TMP59]], i64 16 +; CHECK-NEXT: [[TMP79:%.*]] = insertelement <20 x i32> [[TMP78]], i32 [[TMP60]], i64 17 +; CHECK-NEXT: [[TMP80:%.*]] = insertelement <20 x i32> [[TMP79]], i32 [[TMP61]], i64 18 +; CHECK-NEXT: [[TMP81:%.*]] = insertelement <20 x i32> [[TMP80]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32s(ptr inreg [[TMP54]], i32 inreg [[TMP50]], <20 x i32> inreg [[TMP81]], { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP30]], i32 0) ; CHECK-NEXT: unreachable ; AllocaSpillBB: @@ -614,6 +669,7 @@ attributes #0 = { noreturn } attributes #1 = { nounwind } !continuation.stackAddrspace = !{!0} +!lgc.cps.maxArgumentVgprs = !{!9} !0 = !{i32 5} !1 = !{i32 1} @@ -624,3 +680,4 @@ attributes #1 = { nounwind } !6 = !{ptr @test.2} !7 = !{ptr @test.gep} !8 = !{ptr @test.nested.gep} +!9 = !{i32 3} diff --git a/lgc/test/Transforms/CpsLowering/cps-unify-exits-no-iww.lgc b/lgc/test/Transforms/CpsLowering/cps-unify-exits-no-iww.lgc new file mode 100644 index 0000000000..3557841cd8 --- /dev/null +++ b/lgc/test/Transforms/CpsLowering/cps-unify-exits-no-iww.lgc @@ -0,0 +1,383 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function-signature +; RUN: lgc -mcpu=gfx1030 -o - -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s + +; REQUIRES: !llvm-main-revision-ge-511860 + +declare void @lgc.cps.jump(...) noreturn + +define void @unify_jumps({i32} %state, i32 %arg, ptr %table) !lgc.cps !0 !lgc.shaderstage !{i32 7} { +; CHECK-LABEL: define {{[^@]+}}@unify_jumps +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], { i32 } [[STATE:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]], i32 [[INACTIVE_VGPR2:%.*]], i32 [[INACTIVE_VGPR3:%.*]], i32 [[INACTIVE_VGPR4:%.*]], i32 [[INACTIVE_VGPR5:%.*]], i32 [[INACTIVE_VGPR6:%.*]], i32 [[INACTIVE_VGPR7:%.*]], i32 [[INACTIVE_VGPR8:%.*]], i32 [[INACTIVE_VGPR9:%.*]], i32 [[INACTIVE_VGPR10:%.*]], i32 [[INACTIVE_VGPR11:%.*]], i32 [[INACTIVE_VGPR12:%.*]], i32 [[INACTIVE_VGPR13:%.*]], i32 [[INACTIVE_VGPR14:%.*]], i32 [[INACTIVE_VGPR15:%.*]], i32 [[INACTIVE_VGPR16:%.*]], i32 [[INACTIVE_VGPR17:%.*]], i32 [[INACTIVE_VGPR18:%.*]], i32 [[INACTIVE_VGPR19:%.*]], i32 [[INACTIVE_VGPR20:%.*]], i32 [[INACTIVE_VGPR21:%.*]], i32 [[INACTIVE_VGPR22:%.*]], i32 [[INACTIVE_VGPR23:%.*]], i32 [[INACTIVE_VGPR24:%.*]], i32 [[INACTIVE_VGPR25:%.*]], i32 [[INACTIVE_VGPR26:%.*]], i32 [[INACTIVE_VGPR27:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META0:![0-9]+]] !lgc.shaderstage [[META4:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32 } [[STATE]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TABLE]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[TMP6]] to <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 11: +; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ARG]], 3 +; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[TABLE_0:%.*]] = getelementptr i32, ptr [[TABLE]], i32 0 +; CHECK-NEXT: [[CR_THEN:%.*]] = load i32, ptr [[TABLE_0]], align 4 +; CHECK-NEXT: [[THEN_ARG:%.*]] = add i32 [[ARG]], 1 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: else: +; CHECK-NEXT: [[TABLE_1:%.*]] = getelementptr i32, ptr [[TABLE]], i32 1 +; CHECK-NEXT: [[CR_ELSE:%.*]] = load i32, ptr [[TABLE_1]], align 4 +; CHECK-NEXT: [[ELSE_ARG:%.*]] = uitofp i32 [[ARG]] to float +; CHECK-NEXT: [[TMP12:%.*]] = bitcast float [[ELSE_ARG]] to i32 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[VCR]], [[ENTRY:%.*]] ], [ [[CR_ELSE]], [[ELSE]] ], [ [[CR_THEN]], [[THEN]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP5]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[ARG]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[TMP8]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP9]], [[ENTRY]] ], [ [[TMP12]], [[ELSE]] ], [ [[THEN_ARG]], [[THEN]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY]] ], [ 5, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[INACTIVE_VGPR2]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ [[INACTIVE_VGPR3]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP22:%.*]] = phi i32 [ [[INACTIVE_VGPR4]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ [[INACTIVE_VGPR5]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[INACTIVE_VGPR6]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP25:%.*]] = phi i32 [ [[INACTIVE_VGPR7]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP26:%.*]] = phi i32 [ [[INACTIVE_VGPR8]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi i32 [ [[INACTIVE_VGPR9]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP28:%.*]] = phi i32 [ [[INACTIVE_VGPR10]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi i32 [ [[INACTIVE_VGPR11]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[INACTIVE_VGPR12]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ [[INACTIVE_VGPR13]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP32:%.*]] = phi i32 [ [[INACTIVE_VGPR14]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP33:%.*]] = phi i32 [ [[INACTIVE_VGPR15]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP34:%.*]] = phi i32 [ [[INACTIVE_VGPR16]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP35:%.*]] = phi i32 [ [[INACTIVE_VGPR17]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP36:%.*]] = phi i32 [ [[INACTIVE_VGPR18]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP37:%.*]] = phi i32 [ [[INACTIVE_VGPR19]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP38:%.*]] = phi i32 [ [[INACTIVE_VGPR20]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi i32 [ [[INACTIVE_VGPR21]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi i32 [ [[INACTIVE_VGPR22]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP41:%.*]] = phi i32 [ [[INACTIVE_VGPR23]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ [[INACTIVE_VGPR24]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[INACTIVE_VGPR25]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[INACTIVE_VGPR26]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP45:%.*]] = phi i32 [ [[INACTIVE_VGPR27]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP47:%.*]] = bitcast i64 [[TMP46]] to <2 x i32> +; CHECK-NEXT: [[TMP48:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP49:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP48]], i32 [[TMP13]], 1 +; CHECK-NEXT: [[TMP50:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP49]], i32 [[TMP14]], 2 +; CHECK-NEXT: [[TMP51:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP50]], i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP52:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP51]], i32 [[TMP16]], 4 +; CHECK-NEXT: [[TMP53:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP52]], i32 [[TMP17]], 5 +; CHECK-NEXT: [[TMP54:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP53]], i32 [[TMP18]], 6 +; CHECK-NEXT: [[TMP55:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP54]], i32 [[TMP19]], 7 +; CHECK-NEXT: [[TMP56:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP55]], i32 [[TMP20]], 8 +; CHECK-NEXT: [[TMP57:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP56]], i32 [[TMP21]], 9 +; CHECK-NEXT: [[TMP58:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP57]], i32 [[TMP22]], 10 +; CHECK-NEXT: [[TMP59:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP58]], i32 [[TMP23]], 11 +; CHECK-NEXT: [[TMP60:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP59]], i32 [[TMP24]], 12 +; CHECK-NEXT: [[TMP61:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP60]], i32 [[TMP25]], 13 +; CHECK-NEXT: [[TMP62:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP61]], i32 [[TMP26]], 14 +; CHECK-NEXT: [[TMP63:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP62]], i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP63]], i32 [[TMP28]], 16 +; CHECK-NEXT: [[TMP65:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP64]], i32 [[TMP29]], 17 +; CHECK-NEXT: [[TMP66:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP65]], i32 [[TMP30]], 18 +; CHECK-NEXT: [[TMP67:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP66]], i32 [[TMP31]], 19 +; CHECK-NEXT: [[TMP68:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP67]], i32 [[TMP32]], 20 +; CHECK-NEXT: [[TMP69:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP68]], i32 [[TMP33]], 21 +; CHECK-NEXT: [[TMP70:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP69]], i32 [[TMP34]], 22 +; CHECK-NEXT: [[TMP71:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP70]], i32 [[TMP35]], 23 +; CHECK-NEXT: [[TMP72:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP71]], i32 [[TMP36]], 24 +; CHECK-NEXT: [[TMP73:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP72]], i32 [[TMP37]], 25 +; CHECK-NEXT: [[TMP74:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP73]], i32 [[TMP38]], 26 +; CHECK-NEXT: [[TMP75:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP74]], i32 [[TMP39]], 27 +; CHECK-NEXT: [[TMP76:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP75]], i32 [[TMP40]], 28 +; CHECK-NEXT: [[TMP77:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP76]], i32 [[TMP41]], 29 +; CHECK-NEXT: [[TMP78:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP77]], i32 [[TMP42]], 30 +; CHECK-NEXT: [[TMP79:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP78]], i32 [[TMP43]], 31 +; CHECK-NEXT: [[TMP80:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP79]], i32 [[TMP44]], 32 +; CHECK-NEXT: [[TMP81:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP80]], i32 [[TMP45]], 33 +; CHECK-NEXT: [[TMP82:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP81]], 1 +; CHECK-NEXT: [[TMP83:%.*]] = and i32 [[TMP82]], 7 +; CHECK-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP84]]) +; CHECK-NEXT: [[TMP86:%.*]] = icmp eq i32 [[TMP83]], 3 +; CHECK-NEXT: [[TMP87:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP86]]) +; CHECK-NEXT: [[TMP88:%.*]] = icmp ne i32 [[TMP87]], 0 +; CHECK-NEXT: [[TMP89:%.*]] = select i1 [[TMP88]], i32 [[TMP87]], i32 [[TMP85]] +; CHECK-NEXT: [[TMP90:%.*]] = icmp eq i32 [[TMP83]], 4 +; CHECK-NEXT: [[TMP91:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP90]]) +; CHECK-NEXT: [[TMP92:%.*]] = icmp ne i32 [[TMP91]], 0 +; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP91]], i32 [[TMP89]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp eq i32 [[TMP83]], 5 +; CHECK-NEXT: [[TMP95:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP94]]) +; CHECK-NEXT: [[TMP96:%.*]] = icmp ne i32 [[TMP95]], 0 +; CHECK-NEXT: [[TMP97:%.*]] = select i1 [[TMP96]], i32 [[TMP95]], i32 [[TMP93]] +; CHECK-NEXT: [[TMP98:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP97]], i1 true) +; CHECK-NEXT: [[TMP99:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP82]], i32 [[TMP98]]) +; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP82]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP100]]) +; CHECK-NEXT: [[TMP102:%.*]] = and i32 [[TMP99]], -64 +; CHECK-NEXT: [[TMP103:%.*]] = insertelement <2 x i32> [[TMP47]], i32 [[TMP102]], i64 0 +; CHECK-NEXT: [[TMP104:%.*]] = bitcast <2 x i32> [[TMP103]] to i64 +; CHECK-NEXT: [[TMP105:%.*]] = inttoptr i64 [[TMP104]] to ptr +; CHECK-NEXT: [[TMP106:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP107:%.*]] = bitcast i64 [[TMP106]] to <2 x i32> +; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x i32> [[TMP107]], i64 0 +; CHECK-NEXT: [[TMP109:%.*]] = extractelement <2 x i32> [[TMP107]], i64 1 +; CHECK-NEXT: [[TMP110:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP111:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP112:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP113:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP114:%.*]] = insertelement <20 x i32> [[TMP113]], i32 [[TMP108]], i64 1 +; CHECK-NEXT: [[TMP115:%.*]] = insertelement <20 x i32> [[TMP114]], i32 [[TMP109]], i64 2 +; CHECK-NEXT: [[TMP116:%.*]] = insertelement <20 x i32> [[TMP115]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP117:%.*]] = insertelement <20 x i32> [[TMP116]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP118:%.*]] = insertelement <20 x i32> [[TMP117]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP119:%.*]] = insertelement <20 x i32> [[TMP118]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP120:%.*]] = insertelement <20 x i32> [[TMP119]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP121:%.*]] = insertelement <20 x i32> [[TMP120]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP122:%.*]] = insertelement <20 x i32> [[TMP121]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP123:%.*]] = insertelement <20 x i32> [[TMP122]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP124:%.*]] = insertelement <20 x i32> [[TMP123]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP125:%.*]] = insertelement <20 x i32> [[TMP124]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP126:%.*]] = insertelement <20 x i32> [[TMP125]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP127:%.*]] = insertelement <20 x i32> [[TMP126]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP128:%.*]] = insertelement <20 x i32> [[TMP127]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP129:%.*]] = insertelement <20 x i32> [[TMP128]], i32 [[TMP110]], i64 16 +; CHECK-NEXT: [[TMP130:%.*]] = insertelement <20 x i32> [[TMP129]], i32 [[TMP111]], i64 17 +; CHECK-NEXT: [[TMP131:%.*]] = insertelement <20 x i32> [[TMP130]], i32 [[TMP112]], i64 18 +; CHECK-NEXT: [[TMP132:%.*]] = insertelement <20 x i32> [[TMP131]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32s(ptr inreg [[TMP105]], i32 inreg [[TMP101]], <20 x i32> inreg [[TMP132]], { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP81]], i32 0) +; CHECK-NEXT: unreachable +; +entry: + %cond = icmp ult i32 %arg, 3 + br i1 %cond, label %then, label %else + +then: + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0, align 4 + %then.arg = add i32 %arg, 1 + call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, i32 poison, i32 poison, i32 poison, i32 %then.arg) + unreachable + +else: + %table.1 = getelementptr i32, ptr %table, i32 1 + %cr.else = load i32, ptr %table.1, align 4 + %else.arg = uitofp i32 %arg to float + call void (...) @lgc.cps.jump(i32 %cr.else, i32 2, i32 poison, i32 poison, i32 poison, float %else.arg, i32 5) + unreachable +} + +define void @unify_jump_ret({i32} %state, i32 %arg, ptr %table) !lgc.cps !0 !lgc.shaderstage !{i32 7} { +; CHECK-LABEL: define {{[^@]+}}@unify_jump_ret +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], { i32 } [[STATE:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]], i32 [[INACTIVE_VGPR2:%.*]], i32 [[INACTIVE_VGPR3:%.*]], i32 [[INACTIVE_VGPR4:%.*]], i32 [[INACTIVE_VGPR5:%.*]], i32 [[INACTIVE_VGPR6:%.*]], i32 [[INACTIVE_VGPR7:%.*]], i32 [[INACTIVE_VGPR8:%.*]], i32 [[INACTIVE_VGPR9:%.*]], i32 [[INACTIVE_VGPR10:%.*]], i32 [[INACTIVE_VGPR11:%.*]], i32 [[INACTIVE_VGPR12:%.*]], i32 [[INACTIVE_VGPR13:%.*]], i32 [[INACTIVE_VGPR14:%.*]], i32 [[INACTIVE_VGPR15:%.*]], i32 [[INACTIVE_VGPR16:%.*]], i32 [[INACTIVE_VGPR17:%.*]], i32 [[INACTIVE_VGPR18:%.*]], i32 [[INACTIVE_VGPR19:%.*]], i32 [[INACTIVE_VGPR20:%.*]], i32 [[INACTIVE_VGPR21:%.*]], i32 [[INACTIVE_VGPR22:%.*]], i32 [[INACTIVE_VGPR23:%.*]], i32 [[INACTIVE_VGPR24:%.*]], i32 [[INACTIVE_VGPR25:%.*]], i32 [[INACTIVE_VGPR26:%.*]], i32 [[INACTIVE_VGPR27:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META0]] !lgc.shaderstage [[META4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32 } [[STATE]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TABLE]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[TMP6]] to <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP7]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 11: +; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ARG]], 3 +; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[TABLE_0:%.*]] = getelementptr i32, ptr [[TABLE]], i32 0 +; CHECK-NEXT: [[CR_THEN:%.*]] = load i32, ptr [[TABLE_0]], align 4 +; CHECK-NEXT: [[THEN_ARG:%.*]] = add i32 [[ARG]], 1 +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: else: +; CHECK-NEXT: br label [[TAIL_BLOCK]] +; CHECK: tail.block: +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[VCR]], [[ENTRY:%.*]] ], [ [[CR_THEN]], [[THEN]] ], [ 0, [[ELSE]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP5]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[ARG]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[TMP8]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[TMP9]], [[ENTRY]] ], [ [[THEN_ARG]], [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[INACTIVE_VGPR2]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[INACTIVE_VGPR3]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ [[INACTIVE_VGPR4]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP22:%.*]] = phi i32 [ [[INACTIVE_VGPR5]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ [[INACTIVE_VGPR6]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[INACTIVE_VGPR7]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP25:%.*]] = phi i32 [ [[INACTIVE_VGPR8]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP26:%.*]] = phi i32 [ [[INACTIVE_VGPR9]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi i32 [ [[INACTIVE_VGPR10]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP28:%.*]] = phi i32 [ [[INACTIVE_VGPR11]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi i32 [ [[INACTIVE_VGPR12]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[INACTIVE_VGPR13]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ [[INACTIVE_VGPR14]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP32:%.*]] = phi i32 [ [[INACTIVE_VGPR15]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP33:%.*]] = phi i32 [ [[INACTIVE_VGPR16]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP34:%.*]] = phi i32 [ [[INACTIVE_VGPR17]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP35:%.*]] = phi i32 [ [[INACTIVE_VGPR18]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP36:%.*]] = phi i32 [ [[INACTIVE_VGPR19]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP37:%.*]] = phi i32 [ [[INACTIVE_VGPR20]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP38:%.*]] = phi i32 [ [[INACTIVE_VGPR21]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi i32 [ [[INACTIVE_VGPR22]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi i32 [ [[INACTIVE_VGPR23]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP41:%.*]] = phi i32 [ [[INACTIVE_VGPR24]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ [[INACTIVE_VGPR25]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[INACTIVE_VGPR26]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[INACTIVE_VGPR27]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP45:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP46:%.*]] = bitcast i64 [[TMP45]] to <2 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP48:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP47]], i32 [[TMP12]], 1 +; CHECK-NEXT: [[TMP49:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP48]], i32 [[TMP13]], 2 +; CHECK-NEXT: [[TMP50:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP49]], i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP51:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP50]], i32 [[TMP15]], 4 +; CHECK-NEXT: [[TMP52:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP51]], i32 [[TMP16]], 5 +; CHECK-NEXT: [[TMP53:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP52]], i32 [[TMP17]], 6 +; CHECK-NEXT: [[TMP54:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP53]], i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP55:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP54]], i32 [[TMP19]], 8 +; CHECK-NEXT: [[TMP56:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP55]], i32 [[TMP20]], 9 +; CHECK-NEXT: [[TMP57:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP56]], i32 [[TMP21]], 10 +; CHECK-NEXT: [[TMP58:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP57]], i32 [[TMP22]], 11 +; CHECK-NEXT: [[TMP59:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP58]], i32 [[TMP23]], 12 +; CHECK-NEXT: [[TMP60:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP59]], i32 [[TMP24]], 13 +; CHECK-NEXT: [[TMP61:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP60]], i32 [[TMP25]], 14 +; CHECK-NEXT: [[TMP62:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP61]], i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP62]], i32 [[TMP27]], 16 +; CHECK-NEXT: [[TMP64:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP63]], i32 [[TMP28]], 17 +; CHECK-NEXT: [[TMP65:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP64]], i32 [[TMP29]], 18 +; CHECK-NEXT: [[TMP66:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP65]], i32 [[TMP30]], 19 +; CHECK-NEXT: [[TMP67:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP66]], i32 [[TMP31]], 20 +; CHECK-NEXT: [[TMP68:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP67]], i32 [[TMP32]], 21 +; CHECK-NEXT: [[TMP69:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP68]], i32 [[TMP33]], 22 +; CHECK-NEXT: [[TMP70:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP69]], i32 [[TMP34]], 23 +; CHECK-NEXT: [[TMP71:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP70]], i32 [[TMP35]], 24 +; CHECK-NEXT: [[TMP72:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP71]], i32 [[TMP36]], 25 +; CHECK-NEXT: [[TMP73:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP72]], i32 [[TMP37]], 26 +; CHECK-NEXT: [[TMP74:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP73]], i32 [[TMP38]], 27 +; CHECK-NEXT: [[TMP75:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP74]], i32 [[TMP39]], 28 +; CHECK-NEXT: [[TMP76:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP75]], i32 [[TMP40]], 29 +; CHECK-NEXT: [[TMP77:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP76]], i32 [[TMP41]], 30 +; CHECK-NEXT: [[TMP78:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP77]], i32 [[TMP42]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP78]], i32 [[TMP43]], 32 +; CHECK-NEXT: [[TMP80:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP79]], i32 [[TMP44]], 33 +; CHECK-NEXT: [[TMP81:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP80]], 1 +; CHECK-NEXT: [[TMP82:%.*]] = and i32 [[TMP81]], 7 +; CHECK-NEXT: [[TMP83:%.*]] = icmp ne i32 [[TMP82]], 0 +; CHECK-NEXT: [[TMP84:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP83]]) +; CHECK-NEXT: [[TMP85:%.*]] = icmp eq i32 [[TMP82]], 3 +; CHECK-NEXT: [[TMP86:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP85]]) +; CHECK-NEXT: [[TMP87:%.*]] = icmp ne i32 [[TMP86]], 0 +; CHECK-NEXT: [[TMP88:%.*]] = select i1 [[TMP87]], i32 [[TMP86]], i32 [[TMP84]] +; CHECK-NEXT: [[TMP89:%.*]] = icmp eq i32 [[TMP82]], 4 +; CHECK-NEXT: [[TMP90:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP89]]) +; CHECK-NEXT: [[TMP91:%.*]] = icmp ne i32 [[TMP90]], 0 +; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], i32 [[TMP90]], i32 [[TMP88]] +; CHECK-NEXT: [[TMP93:%.*]] = icmp eq i32 [[TMP82]], 5 +; CHECK-NEXT: [[TMP94:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP93]]) +; CHECK-NEXT: [[TMP95:%.*]] = icmp ne i32 [[TMP94]], 0 +; CHECK-NEXT: [[TMP96:%.*]] = select i1 [[TMP95]], i32 [[TMP94]], i32 [[TMP92]] +; CHECK-NEXT: [[TMP97:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP96]], i1 true) +; CHECK-NEXT: [[TMP98:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP81]], i32 [[TMP97]]) +; CHECK-NEXT: [[TMP99:%.*]] = icmp eq i32 [[TMP81]], [[TMP98]] +; CHECK-NEXT: [[TMP100:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP99]]) +; CHECK-NEXT: [[TMP101:%.*]] = icmp eq i32 [[TMP98]], 0 +; CHECK-NEXT: br i1 [[TMP101]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] +; CHECK: chain.block: +; CHECK-NEXT: [[TMP102:%.*]] = and i32 [[TMP98]], -64 +; CHECK-NEXT: [[TMP103:%.*]] = insertelement <2 x i32> [[TMP46]], i32 [[TMP102]], i64 0 +; CHECK-NEXT: [[TMP104:%.*]] = bitcast <2 x i32> [[TMP103]] to i64 +; CHECK-NEXT: [[TMP105:%.*]] = inttoptr i64 [[TMP104]] to ptr +; CHECK-NEXT: [[TMP106:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP107:%.*]] = bitcast i64 [[TMP106]] to <2 x i32> +; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x i32> [[TMP107]], i64 0 +; CHECK-NEXT: [[TMP109:%.*]] = extractelement <2 x i32> [[TMP107]], i64 1 +; CHECK-NEXT: [[TMP110:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP111:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP112:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP113:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP114:%.*]] = insertelement <20 x i32> [[TMP113]], i32 [[TMP108]], i64 1 +; CHECK-NEXT: [[TMP115:%.*]] = insertelement <20 x i32> [[TMP114]], i32 [[TMP109]], i64 2 +; CHECK-NEXT: [[TMP116:%.*]] = insertelement <20 x i32> [[TMP115]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP117:%.*]] = insertelement <20 x i32> [[TMP116]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP118:%.*]] = insertelement <20 x i32> [[TMP117]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP119:%.*]] = insertelement <20 x i32> [[TMP118]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP120:%.*]] = insertelement <20 x i32> [[TMP119]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP121:%.*]] = insertelement <20 x i32> [[TMP120]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP122:%.*]] = insertelement <20 x i32> [[TMP121]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP123:%.*]] = insertelement <20 x i32> [[TMP122]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP124:%.*]] = insertelement <20 x i32> [[TMP123]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP125:%.*]] = insertelement <20 x i32> [[TMP124]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP126:%.*]] = insertelement <20 x i32> [[TMP125]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP127:%.*]] = insertelement <20 x i32> [[TMP126]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP128:%.*]] = insertelement <20 x i32> [[TMP127]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP129:%.*]] = insertelement <20 x i32> [[TMP128]], i32 [[TMP110]], i64 16 +; CHECK-NEXT: [[TMP130:%.*]] = insertelement <20 x i32> [[TMP129]], i32 [[TMP111]], i64 17 +; CHECK-NEXT: [[TMP131:%.*]] = insertelement <20 x i32> [[TMP130]], i32 [[TMP112]], i64 18 +; CHECK-NEXT: [[TMP132:%.*]] = insertelement <20 x i32> [[TMP131]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32s(ptr inreg [[TMP105]], i32 inreg [[TMP100]], <20 x i32> inreg [[TMP132]], { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP80]], i32 0) +; CHECK-NEXT: unreachable +; CHECK: ret.block: +; CHECK-NEXT: ret void +; +entry: + %cond = icmp ult i32 %arg, 3 + br i1 %cond, label %then, label %else + +then: + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0, align 4 + %then.arg = add i32 %arg, 1 + call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, i32 poison, i32 poison, i32 poison, i32 %then.arg) + unreachable + +else: + ret void +} + +attributes #0 = { noreturn } + +!continuation.stackAddrspace = !{!0} + +!0 = !{i32 5} +!1 = !{i32 1} +!2 = !{i32 7} diff --git a/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc b/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc index 443a69958f..15639b6114 100644 --- a/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc @@ -1,101 +1,187 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --function-signature -; RUN: lgc -mcpu=gfx1030 -o - -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s +; RUN: lgc -mcpu=gfx1030 -o - -lgc-use-init-whole-wave -passes="require,lgc-mutate-entry-point" %s | FileCheck --check-prefixes=CHECK %s + +; REQUIRES: llvm-main-revision-ge-511860 declare void @lgc.cps.jump(...) #0 define void @unify_jumps(i32 %arg, ptr %table) !lgc.cps !1 !lgc.shaderstage !2 { ; CHECK-LABEL: define {{[^@]+}}@unify_jumps -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META4:![0-9]+]] !lgc.shaderstage [[META5:![0-9]+]] { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]], i32 [[INACTIVE_VGPR2:%.*]], i32 [[INACTIVE_VGPR3:%.*]], i32 [[INACTIVE_VGPR4:%.*]], i32 [[INACTIVE_VGPR5:%.*]], i32 [[INACTIVE_VGPR6:%.*]], i32 [[INACTIVE_VGPR7:%.*]], i32 [[INACTIVE_VGPR8:%.*]], i32 [[INACTIVE_VGPR9:%.*]], i32 [[INACTIVE_VGPR10:%.*]], i32 [[INACTIVE_VGPR11:%.*]], i32 [[INACTIVE_VGPR12:%.*]], i32 [[INACTIVE_VGPR13:%.*]], i32 [[INACTIVE_VGPR14:%.*]], i32 [[INACTIVE_VGPR15:%.*]], i32 [[INACTIVE_VGPR16:%.*]], i32 [[INACTIVE_VGPR17:%.*]], i32 [[INACTIVE_VGPR18:%.*]], i32 [[INACTIVE_VGPR19:%.*]], i32 [[INACTIVE_VGPR20:%.*]], i32 [[INACTIVE_VGPR21:%.*]], i32 [[INACTIVE_VGPR22:%.*]], i32 [[INACTIVE_VGPR23:%.*]], i32 [[INACTIVE_VGPR24:%.*]], i32 [[INACTIVE_VGPR25:%.*]], i32 [[INACTIVE_VGPR26:%.*]], i32 [[INACTIVE_VGPR27:%.*]], i32 [[INACTIVE_VGPR28:%.*]]) #[[ATTR1:[0-9]+]] align 64 !lgc.cps [[META4:![0-9]+]] !lgc.shaderstage [[META5:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TABLE]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 10: ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ARG]], 3 ; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[TABLE_0:%.*]] = getelementptr i32, ptr [[TABLE]], i32 0 ; CHECK-NEXT: [[CR_THEN:%.*]] = load i32, ptr [[TABLE_0]], align 4 ; CHECK-NEXT: [[THEN_ARG:%.*]] = add i32 [[ARG]], 1 -; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: else: ; CHECK-NEXT: [[TABLE_1:%.*]] = getelementptr i32, ptr [[TABLE]], i32 1 ; CHECK-NEXT: [[CR_ELSE:%.*]] = load i32, ptr [[TABLE_1]], align 4 ; CHECK-NEXT: [[ELSE_ARG:%.*]] = uitofp i32 [[ARG]] to float -; CHECK-NEXT: [[TMP7:%.*]] = bitcast float [[ELSE_ARG]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float [[ELSE_ARG]] to i32 ; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[CR_ELSE]], [[ELSE]] ], [ [[CR_THEN]], [[THEN]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ poison, [[ELSE]] ], [ poison, [[THEN]] ] -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ poison, [[ELSE]] ], [ poison, [[THEN]] ] -; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ poison, [[ELSE]] ], [ poison, [[THEN]] ] -; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP7]], [[ELSE]] ], [ [[THEN_ARG]], [[THEN]] ] -; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ 5, [[ELSE]] ], [ poison, [[THEN]] ] -; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP14]], i32 [[TMP8]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP15]], i32 [[TMP9]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP16]], i32 [[TMP10]], 3 -; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP17]], i32 [[TMP11]], 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP18]], i32 [[TMP12]], 5 -; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP19]], i32 [[TMP13]], 6 -; CHECK-NEXT: [[TMP21:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP20]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP21]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 7 -; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP24]]) -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP23]], 3 -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP26]]) -; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP27]], i32 [[TMP25]] -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP23]], 2 -; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP30]]) -; CHECK-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP31]], i32 [[TMP29]] -; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP23]], 1 -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP34]]) -; CHECK-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -; CHECK-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 [[TMP35]], i32 [[TMP33]] -; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP37]], i1 true) -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP22]], i32 [[TMP38]]) -; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[TMP22]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP40]]) -; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP39]]) -; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP41]]) -; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP42]], -64 -; CHECK-NEXT: [[TMP45:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP44]], i64 0 -; CHECK-NEXT: [[TMP46:%.*]] = bitcast <2 x i32> [[TMP45]] to i64 -; CHECK-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr -; CHECK-NEXT: [[TMP48:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP49:%.*]] = bitcast i64 [[TMP48]] to <2 x i32> -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <2 x i32> [[TMP49]], i64 0 -; CHECK-NEXT: [[TMP51:%.*]] = extractelement <2 x i32> [[TMP49]], i64 1 -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[TMP50]], i64 1 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[TMP51]], i64 2 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[TMP52]], i64 16 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[TMP53]], i64 17 -; CHECK-NEXT: [[TMP73:%.*]] = insertelement <20 x i32> [[TMP72]], i32 [[TMP54]], i64 18 -; CHECK-NEXT: [[TMP74:%.*]] = insertelement <20 x i32> [[TMP73]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32s(ptr inreg [[TMP47]], i32 inreg [[TMP43]], <20 x i32> inreg [[TMP74]], { <3 x i32>, i32, i32, i32, i32, i32, i32 } [[TMP20]], i32 0) +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[VCR]], [[ENTRY:%.*]] ], [ [[CR_ELSE]], [[ELSE]] ], [ [[CR_THEN]], [[THEN]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[ARG]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP7]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[TMP8]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY]] ], [ [[TMP11]], [[ELSE]] ], [ [[THEN_ARG]], [[THEN]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY]] ], [ 5, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[INACTIVE_VGPR2]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[INACTIVE_VGPR3]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[INACTIVE_VGPR4]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ [[INACTIVE_VGPR5]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP22:%.*]] = phi i32 [ [[INACTIVE_VGPR6]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ [[INACTIVE_VGPR7]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[INACTIVE_VGPR8]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP25:%.*]] = phi i32 [ [[INACTIVE_VGPR9]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP26:%.*]] = phi i32 [ [[INACTIVE_VGPR10]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi i32 [ [[INACTIVE_VGPR11]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP28:%.*]] = phi i32 [ [[INACTIVE_VGPR12]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi i32 [ [[INACTIVE_VGPR13]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[INACTIVE_VGPR14]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ [[INACTIVE_VGPR15]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP32:%.*]] = phi i32 [ [[INACTIVE_VGPR16]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP33:%.*]] = phi i32 [ [[INACTIVE_VGPR17]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP34:%.*]] = phi i32 [ [[INACTIVE_VGPR18]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP35:%.*]] = phi i32 [ [[INACTIVE_VGPR19]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP36:%.*]] = phi i32 [ [[INACTIVE_VGPR20]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP37:%.*]] = phi i32 [ [[INACTIVE_VGPR21]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP38:%.*]] = phi i32 [ [[INACTIVE_VGPR22]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi i32 [ [[INACTIVE_VGPR23]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi i32 [ [[INACTIVE_VGPR24]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP41:%.*]] = phi i32 [ [[INACTIVE_VGPR25]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ [[INACTIVE_VGPR26]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[INACTIVE_VGPR27]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[INACTIVE_VGPR28]], [[ENTRY]] ], [ poison, [[ELSE]] ], [ poison, [[THEN]] ] +; CHECK-NEXT: [[TMP45:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP46:%.*]] = bitcast i64 [[TMP45]] to <2 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP48:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP47]], i32 [[TMP12]], 1 +; CHECK-NEXT: [[TMP49:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP48]], i32 [[TMP13]], 2 +; CHECK-NEXT: [[TMP50:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP49]], i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP51:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP50]], i32 [[TMP15]], 4 +; CHECK-NEXT: [[TMP52:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP51]], i32 [[TMP16]], 5 +; CHECK-NEXT: [[TMP53:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP52]], i32 [[TMP17]], 6 +; CHECK-NEXT: [[TMP54:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP53]], i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP55:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP54]], i32 [[TMP19]], 8 +; CHECK-NEXT: [[TMP56:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP55]], i32 [[TMP20]], 9 +; CHECK-NEXT: [[TMP57:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP56]], i32 [[TMP21]], 10 +; CHECK-NEXT: [[TMP58:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP57]], i32 [[TMP22]], 11 +; CHECK-NEXT: [[TMP59:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP58]], i32 [[TMP23]], 12 +; CHECK-NEXT: [[TMP60:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP59]], i32 [[TMP24]], 13 +; CHECK-NEXT: [[TMP61:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP60]], i32 [[TMP25]], 14 +; CHECK-NEXT: [[TMP62:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP61]], i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP62]], i32 [[TMP27]], 16 +; CHECK-NEXT: [[TMP64:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP63]], i32 [[TMP28]], 17 +; CHECK-NEXT: [[TMP65:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP64]], i32 [[TMP29]], 18 +; CHECK-NEXT: [[TMP66:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP65]], i32 [[TMP30]], 19 +; CHECK-NEXT: [[TMP67:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP66]], i32 [[TMP31]], 20 +; CHECK-NEXT: [[TMP68:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP67]], i32 [[TMP32]], 21 +; CHECK-NEXT: [[TMP69:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP68]], i32 [[TMP33]], 22 +; CHECK-NEXT: [[TMP70:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP69]], i32 [[TMP34]], 23 +; CHECK-NEXT: [[TMP71:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP70]], i32 [[TMP35]], 24 +; CHECK-NEXT: [[TMP72:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP71]], i32 [[TMP36]], 25 +; CHECK-NEXT: [[TMP73:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP72]], i32 [[TMP37]], 26 +; CHECK-NEXT: [[TMP74:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP73]], i32 [[TMP38]], 27 +; CHECK-NEXT: [[TMP75:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP74]], i32 [[TMP39]], 28 +; CHECK-NEXT: [[TMP76:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP75]], i32 [[TMP40]], 29 +; CHECK-NEXT: [[TMP77:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP76]], i32 [[TMP41]], 30 +; CHECK-NEXT: [[TMP78:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP77]], i32 [[TMP42]], 31 +; CHECK-NEXT: [[TMP79:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP78]], i32 [[TMP43]], 32 +; CHECK-NEXT: [[TMP80:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP79]], i32 [[TMP44]], 33 +; CHECK-NEXT: [[TMP81:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP80]], 1 +; CHECK-NEXT: [[TMP82:%.*]] = and i32 [[TMP81]], 7 +; CHECK-NEXT: [[TMP83:%.*]] = icmp ne i32 [[TMP82]], 0 +; CHECK-NEXT: [[TMP84:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP83]]) +; CHECK-NEXT: [[TMP85:%.*]] = icmp eq i32 [[TMP82]], 3 +; CHECK-NEXT: [[TMP86:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP85]]) +; CHECK-NEXT: [[TMP87:%.*]] = icmp ne i32 [[TMP86]], 0 +; CHECK-NEXT: [[TMP88:%.*]] = select i1 [[TMP87]], i32 [[TMP86]], i32 [[TMP84]] +; CHECK-NEXT: [[TMP89:%.*]] = icmp eq i32 [[TMP82]], 2 +; CHECK-NEXT: [[TMP90:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP89]]) +; CHECK-NEXT: [[TMP91:%.*]] = icmp ne i32 [[TMP90]], 0 +; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], i32 [[TMP90]], i32 [[TMP88]] +; CHECK-NEXT: [[TMP93:%.*]] = icmp eq i32 [[TMP82]], 1 +; CHECK-NEXT: [[TMP94:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP93]]) +; CHECK-NEXT: [[TMP95:%.*]] = icmp ne i32 [[TMP94]], 0 +; CHECK-NEXT: [[TMP96:%.*]] = select i1 [[TMP95]], i32 [[TMP94]], i32 [[TMP92]] +; CHECK-NEXT: [[TMP97:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP96]], i1 true) +; CHECK-NEXT: [[TMP98:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP81]], i32 [[TMP97]]) +; CHECK-NEXT: [[TMP99:%.*]] = icmp eq i32 [[TMP81]], [[TMP98]] +; CHECK-NEXT: [[TMP100:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP99]]) +; CHECK-NEXT: [[TMP101:%.*]] = and i32 [[TMP98]], -64 +; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i32> [[TMP46]], i32 [[TMP101]], i64 0 +; CHECK-NEXT: [[TMP103:%.*]] = bitcast <2 x i32> [[TMP102]] to i64 +; CHECK-NEXT: [[TMP104:%.*]] = inttoptr i64 [[TMP103]] to ptr +; CHECK-NEXT: [[TMP105:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP106:%.*]] = bitcast i64 [[TMP105]] to <2 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = extractelement <2 x i32> [[TMP106]], i64 0 +; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x i32> [[TMP106]], i64 1 +; CHECK-NEXT: [[TMP109:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP110:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP111:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP112:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP113:%.*]] = insertelement <20 x i32> [[TMP112]], i32 [[TMP107]], i64 1 +; CHECK-NEXT: [[TMP114:%.*]] = insertelement <20 x i32> [[TMP113]], i32 [[TMP108]], i64 2 +; CHECK-NEXT: [[TMP115:%.*]] = insertelement <20 x i32> [[TMP114]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP116:%.*]] = insertelement <20 x i32> [[TMP115]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP117:%.*]] = insertelement <20 x i32> [[TMP116]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP118:%.*]] = insertelement <20 x i32> [[TMP117]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP119:%.*]] = insertelement <20 x i32> [[TMP118]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP120:%.*]] = insertelement <20 x i32> [[TMP119]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP121:%.*]] = insertelement <20 x i32> [[TMP120]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP122:%.*]] = insertelement <20 x i32> [[TMP121]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP123:%.*]] = insertelement <20 x i32> [[TMP122]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP124:%.*]] = insertelement <20 x i32> [[TMP123]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP125:%.*]] = insertelement <20 x i32> [[TMP124]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP126:%.*]] = insertelement <20 x i32> [[TMP125]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP127:%.*]] = insertelement <20 x i32> [[TMP126]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP128:%.*]] = insertelement <20 x i32> [[TMP127]], i32 [[TMP109]], i64 16 +; CHECK-NEXT: [[TMP129:%.*]] = insertelement <20 x i32> [[TMP128]], i32 [[TMP110]], i64 17 +; CHECK-NEXT: [[TMP130:%.*]] = insertelement <20 x i32> [[TMP129]], i32 [[TMP111]], i64 18 +; CHECK-NEXT: [[TMP131:%.*]] = insertelement <20 x i32> [[TMP130]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32s(ptr inreg [[TMP104]], i32 inreg [[TMP100]], <20 x i32> inreg [[TMP131]], { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP80]], i32 0) ; CHECK-NEXT: unreachable ; entry: @@ -119,94 +205,154 @@ else: ; preds = %entry define void @unify_jump_ret(i32 %arg, ptr %table) !lgc.cps !1 !lgc.shaderstage !2 { ; CHECK-LABEL: define {{[^@]+}}@unify_jump_ret -; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META4]] !lgc.shaderstage [[META5]] { +; CHECK-SAME: (i32 inreg [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg [[NUMWORKGROUPSPTR:%.*]], i32 inreg [[PAD0:%.*]], i32 inreg [[PAD1:%.*]], i32 inreg [[PAD2:%.*]], i32 inreg [[PAD3:%.*]], i32 inreg [[PAD4:%.*]], i32 inreg [[PAD5:%.*]], i32 inreg [[PAD6:%.*]], i32 inreg [[PAD7:%.*]], i32 inreg [[PAD8:%.*]], i32 inreg [[PAD9:%.*]], i32 inreg [[PAD10:%.*]], i32 inreg [[PAD11:%.*]], i32 inreg [[SPILLTABLE:%.*]], <3 x i32> inreg [[WORKGROUPID:%.*]], i32 inreg [[MULTIDISPATCHINFO:%.*]], <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 [[VCR:%.*]], i32 [[ARG:%.*]], ptr [[TABLE:%.*]], i32 [[INACTIVE_VGPR:%.*]], i32 [[INACTIVE_VGPR1:%.*]], i32 [[INACTIVE_VGPR2:%.*]], i32 [[INACTIVE_VGPR3:%.*]], i32 [[INACTIVE_VGPR4:%.*]], i32 [[INACTIVE_VGPR5:%.*]], i32 [[INACTIVE_VGPR6:%.*]], i32 [[INACTIVE_VGPR7:%.*]], i32 [[INACTIVE_VGPR8:%.*]], i32 [[INACTIVE_VGPR9:%.*]], i32 [[INACTIVE_VGPR10:%.*]], i32 [[INACTIVE_VGPR11:%.*]], i32 [[INACTIVE_VGPR12:%.*]], i32 [[INACTIVE_VGPR13:%.*]], i32 [[INACTIVE_VGPR14:%.*]], i32 [[INACTIVE_VGPR15:%.*]], i32 [[INACTIVE_VGPR16:%.*]], i32 [[INACTIVE_VGPR17:%.*]], i32 [[INACTIVE_VGPR18:%.*]], i32 [[INACTIVE_VGPR19:%.*]], i32 [[INACTIVE_VGPR20:%.*]], i32 [[INACTIVE_VGPR21:%.*]], i32 [[INACTIVE_VGPR22:%.*]], i32 [[INACTIVE_VGPR23:%.*]], i32 [[INACTIVE_VGPR24:%.*]], i32 [[INACTIVE_VGPR25:%.*]], i32 [[INACTIVE_VGPR26:%.*]], i32 [[INACTIVE_VGPR27:%.*]], i32 [[INACTIVE_VGPR28:%.*]]) #[[ATTR1]] align 64 !lgc.cps [[META4]] !lgc.shaderstage [[META5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TABLE]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.amdgcn.init.whole.wave() +; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TAIL_BLOCK:%.*]] +; CHECK: 10: ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ARG]], 3 ; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[TABLE_0:%.*]] = getelementptr i32, ptr [[TABLE]], i32 0 ; CHECK-NEXT: [[CR_THEN:%.*]] = load i32, ptr [[TABLE_0]], align 4 ; CHECK-NEXT: [[THEN_ARG:%.*]] = add i32 [[ARG]], 1 -; CHECK-NEXT: br label [[TAIL_BLOCK:%.*]] +; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: else: ; CHECK-NEXT: br label [[TAIL_BLOCK]] ; CHECK: tail.block: -; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[CR_THEN]], [[THEN]] ], [ 0, [[ELSE]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ poison, [[THEN]] ], [ poison, [[ELSE]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ poison, [[THEN]] ], [ poison, [[ELSE]] ] -; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ poison, [[THEN]] ], [ poison, [[ELSE]] ] -; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[THEN_ARG]], [[THEN]] ], [ poison, [[ELSE]] ] -; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP12]], i32 [[TMP7]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP13]], i32 [[TMP8]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP14]], i32 [[TMP9]], 3 -; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP15]], i32 [[TMP10]], 4 -; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP16]], i32 [[TMP11]], 5 -; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP17]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 [[TMP18]], i32 [[VCR]]) -; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 7 -; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP21]]) -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3 -; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP23]]) -; CHECK-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP22]] -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP20]], 2 -; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP27]]) -; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP26]] -; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[TMP20]], 1 -; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP31]]) -; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP30]] -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP34]], i1 true) -; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP19]], i32 [[TMP35]]) -; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP19]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP37]]) -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP36]]) -; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP38]]) -; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP39]], 0 -; CHECK-NEXT: br i1 [[TMP41]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[VCR]], [[ENTRY:%.*]] ], [ [[CR_THEN]], [[THEN]] ], [ 0, [[ELSE]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[ARG]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP7]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP8]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[INACTIVE_VGPR]], [[ENTRY]] ], [ [[THEN_ARG]], [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi i32 [ [[INACTIVE_VGPR1]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[INACTIVE_VGPR2]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[INACTIVE_VGPR3]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[INACTIVE_VGPR4]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[INACTIVE_VGPR5]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ [[INACTIVE_VGPR6]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP22:%.*]] = phi i32 [ [[INACTIVE_VGPR7]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ [[INACTIVE_VGPR8]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[INACTIVE_VGPR9]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP25:%.*]] = phi i32 [ [[INACTIVE_VGPR10]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP26:%.*]] = phi i32 [ [[INACTIVE_VGPR11]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi i32 [ [[INACTIVE_VGPR12]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP28:%.*]] = phi i32 [ [[INACTIVE_VGPR13]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi i32 [ [[INACTIVE_VGPR14]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[INACTIVE_VGPR15]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP31:%.*]] = phi i32 [ [[INACTIVE_VGPR16]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP32:%.*]] = phi i32 [ [[INACTIVE_VGPR17]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP33:%.*]] = phi i32 [ [[INACTIVE_VGPR18]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP34:%.*]] = phi i32 [ [[INACTIVE_VGPR19]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP35:%.*]] = phi i32 [ [[INACTIVE_VGPR20]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP36:%.*]] = phi i32 [ [[INACTIVE_VGPR21]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP37:%.*]] = phi i32 [ [[INACTIVE_VGPR22]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP38:%.*]] = phi i32 [ [[INACTIVE_VGPR23]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP39:%.*]] = phi i32 [ [[INACTIVE_VGPR24]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP40:%.*]] = phi i32 [ [[INACTIVE_VGPR25]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP41:%.*]] = phi i32 [ [[INACTIVE_VGPR26]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP42:%.*]] = phi i32 [ [[INACTIVE_VGPR27]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[INACTIVE_VGPR28]], [[ENTRY]] ], [ poison, [[THEN]] ], [ poison, [[ELSE]] ] +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP45:%.*]] = bitcast i64 [[TMP44]] to <2 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } poison, <3 x i32> [[LOCALINVOCATIONID]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP46]], i32 [[TMP11]], 1 +; CHECK-NEXT: [[TMP48:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP47]], i32 [[TMP12]], 2 +; CHECK-NEXT: [[TMP49:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP48]], i32 [[TMP13]], 3 +; CHECK-NEXT: [[TMP50:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP49]], i32 [[TMP14]], 4 +; CHECK-NEXT: [[TMP51:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP50]], i32 [[TMP15]], 5 +; CHECK-NEXT: [[TMP52:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP51]], i32 [[TMP16]], 6 +; CHECK-NEXT: [[TMP53:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP52]], i32 [[TMP17]], 7 +; CHECK-NEXT: [[TMP54:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP53]], i32 [[TMP18]], 8 +; CHECK-NEXT: [[TMP55:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP54]], i32 [[TMP19]], 9 +; CHECK-NEXT: [[TMP56:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP55]], i32 [[TMP20]], 10 +; CHECK-NEXT: [[TMP57:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP56]], i32 [[TMP21]], 11 +; CHECK-NEXT: [[TMP58:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP57]], i32 [[TMP22]], 12 +; CHECK-NEXT: [[TMP59:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP58]], i32 [[TMP23]], 13 +; CHECK-NEXT: [[TMP60:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP59]], i32 [[TMP24]], 14 +; CHECK-NEXT: [[TMP61:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP60]], i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP62:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP61]], i32 [[TMP26]], 16 +; CHECK-NEXT: [[TMP63:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP62]], i32 [[TMP27]], 17 +; CHECK-NEXT: [[TMP64:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP63]], i32 [[TMP28]], 18 +; CHECK-NEXT: [[TMP65:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP64]], i32 [[TMP29]], 19 +; CHECK-NEXT: [[TMP66:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP65]], i32 [[TMP30]], 20 +; CHECK-NEXT: [[TMP67:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP66]], i32 [[TMP31]], 21 +; CHECK-NEXT: [[TMP68:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP67]], i32 [[TMP32]], 22 +; CHECK-NEXT: [[TMP69:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP68]], i32 [[TMP33]], 23 +; CHECK-NEXT: [[TMP70:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP69]], i32 [[TMP34]], 24 +; CHECK-NEXT: [[TMP71:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP70]], i32 [[TMP35]], 25 +; CHECK-NEXT: [[TMP72:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP71]], i32 [[TMP36]], 26 +; CHECK-NEXT: [[TMP73:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP72]], i32 [[TMP37]], 27 +; CHECK-NEXT: [[TMP74:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP73]], i32 [[TMP38]], 28 +; CHECK-NEXT: [[TMP75:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP74]], i32 [[TMP39]], 29 +; CHECK-NEXT: [[TMP76:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP75]], i32 [[TMP40]], 30 +; CHECK-NEXT: [[TMP77:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP76]], i32 [[TMP41]], 31 +; CHECK-NEXT: [[TMP78:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP77]], i32 [[TMP42]], 32 +; CHECK-NEXT: [[TMP79:%.*]] = insertvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP78]], i32 [[TMP43]], 33 +; CHECK-NEXT: [[TMP80:%.*]] = extractvalue { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP79]], 1 +; CHECK-NEXT: [[TMP81:%.*]] = and i32 [[TMP80]], 7 +; CHECK-NEXT: [[TMP82:%.*]] = icmp ne i32 [[TMP81]], 0 +; CHECK-NEXT: [[TMP83:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP82]]) +; CHECK-NEXT: [[TMP84:%.*]] = icmp eq i32 [[TMP81]], 3 +; CHECK-NEXT: [[TMP85:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP84]]) +; CHECK-NEXT: [[TMP86:%.*]] = icmp ne i32 [[TMP85]], 0 +; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP85]], i32 [[TMP83]] +; CHECK-NEXT: [[TMP88:%.*]] = icmp eq i32 [[TMP81]], 2 +; CHECK-NEXT: [[TMP89:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP88]]) +; CHECK-NEXT: [[TMP90:%.*]] = icmp ne i32 [[TMP89]], 0 +; CHECK-NEXT: [[TMP91:%.*]] = select i1 [[TMP90]], i32 [[TMP89]], i32 [[TMP87]] +; CHECK-NEXT: [[TMP92:%.*]] = icmp eq i32 [[TMP81]], 1 +; CHECK-NEXT: [[TMP93:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP92]]) +; CHECK-NEXT: [[TMP94:%.*]] = icmp ne i32 [[TMP93]], 0 +; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[TMP93]], i32 [[TMP91]] +; CHECK-NEXT: [[TMP96:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP95]], i1 true) +; CHECK-NEXT: [[TMP97:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TMP80]], i32 [[TMP96]]) +; CHECK-NEXT: [[TMP98:%.*]] = icmp eq i32 [[TMP80]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP98]]) +; CHECK-NEXT: [[TMP100:%.*]] = icmp eq i32 [[TMP97]], 0 +; CHECK-NEXT: br i1 [[TMP100]], label [[RET_BLOCK:%.*]], label [[CHAIN_BLOCK:%.*]] ; CHECK: chain.block: -; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], -64 -; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP42]], i64 0 -; CHECK-NEXT: [[TMP44:%.*]] = bitcast <2 x i32> [[TMP43]] to i64 -; CHECK-NEXT: [[TMP45:%.*]] = inttoptr i64 [[TMP44]] to ptr -; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 -; CHECK-NEXT: [[TMP47:%.*]] = bitcast i64 [[TMP46]] to <2 x i32> -; CHECK-NEXT: [[TMP48:%.*]] = extractelement <2 x i32> [[TMP47]], i64 0 -; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[TMP47]], i64 1 -; CHECK-NEXT: [[TMP50:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 -; CHECK-NEXT: [[TMP51:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 -; CHECK-NEXT: [[TMP53:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <20 x i32> [[TMP53]], i32 [[TMP48]], i64 1 -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <20 x i32> [[TMP54]], i32 [[TMP49]], i64 2 -; CHECK-NEXT: [[TMP56:%.*]] = insertelement <20 x i32> [[TMP55]], i32 [[PAD0]], i64 3 -; CHECK-NEXT: [[TMP57:%.*]] = insertelement <20 x i32> [[TMP56]], i32 [[PAD1]], i64 4 -; CHECK-NEXT: [[TMP58:%.*]] = insertelement <20 x i32> [[TMP57]], i32 [[PAD2]], i64 5 -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <20 x i32> [[TMP58]], i32 [[PAD3]], i64 6 -; CHECK-NEXT: [[TMP60:%.*]] = insertelement <20 x i32> [[TMP59]], i32 [[PAD4]], i64 7 -; CHECK-NEXT: [[TMP61:%.*]] = insertelement <20 x i32> [[TMP60]], i32 [[PAD5]], i64 8 -; CHECK-NEXT: [[TMP62:%.*]] = insertelement <20 x i32> [[TMP61]], i32 [[PAD6]], i64 9 -; CHECK-NEXT: [[TMP63:%.*]] = insertelement <20 x i32> [[TMP62]], i32 [[PAD7]], i64 10 -; CHECK-NEXT: [[TMP64:%.*]] = insertelement <20 x i32> [[TMP63]], i32 [[PAD8]], i64 11 -; CHECK-NEXT: [[TMP65:%.*]] = insertelement <20 x i32> [[TMP64]], i32 [[PAD9]], i64 12 -; CHECK-NEXT: [[TMP66:%.*]] = insertelement <20 x i32> [[TMP65]], i32 [[PAD10]], i64 13 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <20 x i32> [[TMP66]], i32 [[PAD11]], i64 14 -; CHECK-NEXT: [[TMP68:%.*]] = insertelement <20 x i32> [[TMP67]], i32 [[SPILLTABLE]], i64 15 -; CHECK-NEXT: [[TMP69:%.*]] = insertelement <20 x i32> [[TMP68]], i32 [[TMP50]], i64 16 -; CHECK-NEXT: [[TMP70:%.*]] = insertelement <20 x i32> [[TMP69]], i32 [[TMP51]], i64 17 -; CHECK-NEXT: [[TMP71:%.*]] = insertelement <20 x i32> [[TMP70]], i32 [[TMP52]], i64 18 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <20 x i32> [[TMP71]], i32 [[MULTIDISPATCHINFO]], i64 19 -; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32s(ptr inreg [[TMP45]], i32 inreg [[TMP40]], <20 x i32> inreg [[TMP72]], { <3 x i32>, i32, i32, i32, i32, i32 } [[TMP17]], i32 0) +; CHECK-NEXT: [[TMP101:%.*]] = and i32 [[TMP97]], -64 +; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i32> [[TMP45]], i32 [[TMP101]], i64 0 +; CHECK-NEXT: [[TMP103:%.*]] = bitcast <2 x i32> [[TMP102]] to i64 +; CHECK-NEXT: [[TMP104:%.*]] = inttoptr i64 [[TMP103]] to ptr +; CHECK-NEXT: [[TMP105:%.*]] = ptrtoint ptr addrspace(4) [[NUMWORKGROUPSPTR]] to i64 +; CHECK-NEXT: [[TMP106:%.*]] = bitcast i64 [[TMP105]] to <2 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = extractelement <2 x i32> [[TMP106]], i64 0 +; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x i32> [[TMP106]], i64 1 +; CHECK-NEXT: [[TMP109:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 0 +; CHECK-NEXT: [[TMP110:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 1 +; CHECK-NEXT: [[TMP111:%.*]] = extractelement <3 x i32> [[WORKGROUPID]], i64 2 +; CHECK-NEXT: [[TMP112:%.*]] = insertelement <20 x i32> poison, i32 [[GLOBALTABLE]], i64 0 +; CHECK-NEXT: [[TMP113:%.*]] = insertelement <20 x i32> [[TMP112]], i32 [[TMP107]], i64 1 +; CHECK-NEXT: [[TMP114:%.*]] = insertelement <20 x i32> [[TMP113]], i32 [[TMP108]], i64 2 +; CHECK-NEXT: [[TMP115:%.*]] = insertelement <20 x i32> [[TMP114]], i32 [[PAD0]], i64 3 +; CHECK-NEXT: [[TMP116:%.*]] = insertelement <20 x i32> [[TMP115]], i32 [[PAD1]], i64 4 +; CHECK-NEXT: [[TMP117:%.*]] = insertelement <20 x i32> [[TMP116]], i32 [[PAD2]], i64 5 +; CHECK-NEXT: [[TMP118:%.*]] = insertelement <20 x i32> [[TMP117]], i32 [[PAD3]], i64 6 +; CHECK-NEXT: [[TMP119:%.*]] = insertelement <20 x i32> [[TMP118]], i32 [[PAD4]], i64 7 +; CHECK-NEXT: [[TMP120:%.*]] = insertelement <20 x i32> [[TMP119]], i32 [[PAD5]], i64 8 +; CHECK-NEXT: [[TMP121:%.*]] = insertelement <20 x i32> [[TMP120]], i32 [[PAD6]], i64 9 +; CHECK-NEXT: [[TMP122:%.*]] = insertelement <20 x i32> [[TMP121]], i32 [[PAD7]], i64 10 +; CHECK-NEXT: [[TMP123:%.*]] = insertelement <20 x i32> [[TMP122]], i32 [[PAD8]], i64 11 +; CHECK-NEXT: [[TMP124:%.*]] = insertelement <20 x i32> [[TMP123]], i32 [[PAD9]], i64 12 +; CHECK-NEXT: [[TMP125:%.*]] = insertelement <20 x i32> [[TMP124]], i32 [[PAD10]], i64 13 +; CHECK-NEXT: [[TMP126:%.*]] = insertelement <20 x i32> [[TMP125]], i32 [[PAD11]], i64 14 +; CHECK-NEXT: [[TMP127:%.*]] = insertelement <20 x i32> [[TMP126]], i32 [[SPILLTABLE]], i64 15 +; CHECK-NEXT: [[TMP128:%.*]] = insertelement <20 x i32> [[TMP127]], i32 [[TMP109]], i64 16 +; CHECK-NEXT: [[TMP129:%.*]] = insertelement <20 x i32> [[TMP128]], i32 [[TMP110]], i64 17 +; CHECK-NEXT: [[TMP130:%.*]] = insertelement <20 x i32> [[TMP129]], i32 [[TMP111]], i64 18 +; CHECK-NEXT: [[TMP131:%.*]] = insertelement <20 x i32> [[TMP130]], i32 [[MULTIDISPATCHINFO]], i64 19 +; CHECK-NEXT: call void (ptr, i32, <20 x i32>, { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_v3i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32i32s(ptr inreg [[TMP104]], i32 inreg [[TMP99]], <20 x i32> inreg [[TMP131]], { <3 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } [[TMP79]], i32 0) ; CHECK-NEXT: unreachable ; CHECK: ret.block: ; CHECK-NEXT: ret void @@ -229,7 +375,9 @@ else: ; preds = %entry attributes #0 = { noreturn } !continuation.stackAddrspace = !{!0} +!lgc.cps.maxArgumentVgprs = !{!3} !0 = !{i32 5} !1 = !{i32 1} !2 = !{i32 7} +!3 = !{i32 32} diff --git a/lgc/test/Transforms/CpsLowering/missing-max-argument-vgprs.lgc b/lgc/test/Transforms/CpsLowering/missing-max-argument-vgprs.lgc new file mode 100644 index 0000000000..1d2e4f9bbb --- /dev/null +++ b/lgc/test/Transforms/CpsLowering/missing-max-argument-vgprs.lgc @@ -0,0 +1,68 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; RUN: not --crash lgc -mcpu=gfx1030 -o - -lgc-use-init-whole-wave /dev/null -passes="require,lgc-mutate-entry-point" %s 2>&1 | FileCheck --check-prefixes=CHECK %s + +; REQUIRES: llvm-main-revision-ge-511860 + +; CHECK: Missing lgc.cps.maxArgumentVgprs metadata + +declare void @lgc.cps.jump(...) #0 + +define void @test(i32 %cspInit, i32 %arg, ptr %table, i32 %rcr) !lgc.cps !1 !lgc.shaderstage !2 !continuation !3 !continuation.state !4 { +entry: + %csp = alloca i32, align 4 + %local = alloca i32, align 4, addrspace(5) + store i32 %cspInit, ptr %csp, align 4 + %table.0 = getelementptr i32, ptr %table, i32 0 + %cr.then = load i32, ptr %table.0, align 4 + %then.arg = add i32 %arg, 1 + %0 = load i32, ptr %csp, align 4 + store i32 %then.arg, ptr addrspace(5) %local + call void (...) @lgc.cps.jump(i32 %cr.then, i32 2, i32 %0, i32 %rcr, i32 %then.arg) + unreachable +} + +declare !continuation !3 { ptr, ptr } @continuation.prototype.test(ptr, i1) + +declare ptr @continuation.malloc(i32) + +declare void @continuation.free(ptr) + +declare token @llvm.coro.id.retcon(i32, i32, ptr, ptr, ptr, ptr) #1 + +declare ptr @llvm.coro.begin(token, ptr writeonly) #1 + +attributes #0 = { noreturn } +attributes #1 = { nounwind } + +!continuation.stackAddrspace = !{!0} + +!0 = !{i32 5} +!1 = !{i32 1} +!2 = !{i32 7} +!3 = !{ptr @test} +!4 = !{i32 0} diff --git a/lgc/test/Transforms/LowerBufferOperations/InvariantStartUserWithPhiNode.lgc b/lgc/test/Transforms/LowerBufferOperations/InvariantStartUserWithPhiNode.lgc index 3ea4645cdb..c4cf8fe118 100644 --- a/lgc/test/Transforms/LowerBufferOperations/InvariantStartUserWithPhiNode.lgc +++ b/lgc/test/Transforms/LowerBufferOperations/InvariantStartUserWithPhiNode.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes="require,function(lgc-lower-buffer-operations)" %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/Transforms/LowerBufferOperations/buffer-index-op.lgc b/lgc/test/Transforms/LowerBufferOperations/buffer-index-op.lgc index 7e25b78036..9f469a7b57 100644 --- a/lgc/test/Transforms/LowerBufferOperations/buffer-index-op.lgc +++ b/lgc/test/Transforms/LowerBufferOperations/buffer-index-op.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 ; RUN: lgc --mcpu=gfx1100 -o - -passes='require,function(lgc-structurize-buffers)' %s | FileCheck --check-prefixes=GFX11 %s diff --git a/lgc/test/Transforms/LowerBufferOperations/buffer.atomic.ops.lgc b/lgc/test/Transforms/LowerBufferOperations/buffer.atomic.ops.lgc index 8225ca73a2..6e5bd717ca 100644 --- a/lgc/test/Transforms/LowerBufferOperations/buffer.atomic.ops.lgc +++ b/lgc/test/Transforms/LowerBufferOperations/buffer.atomic.ops.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes='require,function(lgc-lower-buffer-operations)' %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/Transforms/LowerBufferOperations/simple.lgc b/lgc/test/Transforms/LowerBufferOperations/simple.lgc index dd8d434598..72ebccdcae 100644 --- a/lgc/test/Transforms/LowerBufferOperations/simple.lgc +++ b/lgc/test/Transforms/LowerBufferOperations/simple.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes='require,function(lgc-lower-buffer-operations)' %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/Transforms/LowerBufferOperations/strided-buffer-ops.lgc b/lgc/test/Transforms/LowerBufferOperations/strided-buffer-ops.lgc index b4abb90966..2201820b38 100644 --- a/lgc/test/Transforms/LowerBufferOperations/strided-buffer-ops.lgc +++ b/lgc/test/Transforms/LowerBufferOperations/strided-buffer-ops.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 ; RUN: lgc --mcpu=gfx1100 -o - -passes="require,module(lgc-lower-desc),module(lgc-mutate-entry-point),function(lgc-lower-buffer-operations)" %s | FileCheck --check-prefixes=GFX11 %s @@ -293,7 +319,7 @@ entry: %145 = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) %146 = call ptr @llvm.invariant.start.p9(i64 -1, ptr addrspace(9) %145) %147 = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %145, i32 24) - %res = load float, ptr addrspace(9) %147, align 16 + %res = load float, ptr addrspace(9) %147, align 4 store float %res, ptr %out, align 4 ret void } @@ -323,7 +349,7 @@ define amdgpu_kernel void @strided_buffer_uniform_strided_load(<4 x i32> %desc, ; GFX11-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 16383 ; GFX11-NEXT: [[TMP19:%.*]] = mul i32 24, [[TMP18]] ; GFX11-NEXT: [[TMP20:%.*]] = add i32 0, [[TMP19]] -; GFX11-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[TMP15]], i32 %20, i32 0), !invariant.load [[META8:![0-9]+]] +; GFX11-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[TMP15]], i32 [[TMP20]], i32 0), !invariant.load [[META8]] ; GFX11-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float ; GFX11-NEXT: store float [[TMP22]], ptr [[OUT]], align 4 ; GFX11-NEXT: ret void @@ -332,11 +358,45 @@ entry: %buf = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) %146 = call ptr @llvm.invariant.start.p9(i64 -1, ptr addrspace(9) %buf) %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf, i32 24) - %res = load float, ptr addrspace(9) %buf.idx, align 16 + %res = load float, ptr addrspace(9) %buf.idx, align 4 store float %res, ptr %out, align 4 ret void } +define amdgpu_kernel void @strided_buffer_uniform_strided_load_f16(<4 x i32> %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX11-LABEL: define amdgpu_gfx void @strided_buffer_uniform_strided_load_f16 +; GFX11-SAME: (<4 x i32> [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR1]] !lgc.shaderstage [[META7]] { +; GFX11-NEXT: entry: +; GFX11-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX11-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX11-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX11-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX11-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX11-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[USERDATA4]], i64 0 +; GFX11-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[USERDATA5]], i64 1 +; GFX11-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to i64 +; GFX11-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> +; GFX11-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i64 0 +; GFX11-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i64 1 +; GFX11-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0 +; GFX11-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], 1048576 +; GFX11-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i64 1 +; GFX11-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 -1, i64 2 +; GFX11-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 805392300, i64 3 +; GFX11-NEXT: [[TMP16:%.*]] = call i16 @llvm.amdgcn.struct.buffer.load.i16{{(\.v4i32)?}}(<4 x i32> [[TMP15]], i32 24, i32 0, i32 0, i32 0) +; GFX11-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP16]] to half +; GFX11-NEXT: store half [[TMP17]], ptr [[OUT]], align 2 +; GFX11-NEXT: ret void +; +entry: + %buf = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) + %146 = call ptr @llvm.invariant.start.p9(i64 -1, ptr addrspace(9) %buf) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf, i32 24) + %res = load half, ptr addrspace(9) %buf.idx, align 2 + store half %res, ptr %out, align 2 + ret void +} + define amdgpu_kernel void @strided_buffer_convert_uniform_strided_load(<4 x i32> inreg %desc, ptr %out) #0 !lgc.shaderstage !4 { ; GFX11-LABEL: define amdgpu_gfx void @strided_buffer_convert_uniform_strided_load ; GFX11-SAME: (<4 x i32> inreg [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR1]] !lgc.shaderstage [[META7]] { @@ -357,7 +417,7 @@ define amdgpu_kernel void @strided_buffer_convert_uniform_strided_load(<4 x i32> ; GFX11-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], -805306369 ; GFX11-NEXT: [[TMP14:%.*]] = or i32 [[TMP13]], 268435456 ; GFX11-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 3 -; GFX11-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[DESC]], i32 add (i32 ptrtoint (ptr addrspace(6) getelementptr inbounds (i8, ptr addrspace(6) null, i32 8) to i32), i32 288), i32 0), !invariant.load [[META8:![0-9]+]] +; GFX11-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[DESC]], i32 add (i32 ptrtoint (ptr addrspace(6) getelementptr inbounds (i8, ptr addrspace(6) null, i32 8) to i32), i32 288), i32 0), !invariant.load [[META8]] ; GFX11-NEXT: [[TMP17:%.*]] = bitcast i32 [[TMP16]] to float ; GFX11-NEXT: store float [[TMP17]], ptr [[OUT]], align 4 ; GFX11-NEXT: ret void @@ -373,6 +433,42 @@ entry: ret void } +define amdgpu_kernel void @strided_buffer_convert_uniform_strided_load_f16(<4 x i32> inreg %desc, ptr %out) #0 !lgc.shaderstage !4 { +; GFX11-LABEL: define amdgpu_gfx void @strided_buffer_convert_uniform_strided_load_f16 +; GFX11-SAME: (<4 x i32> inreg [[DESC:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR1]] !lgc.shaderstage [[META7]] { +; GFX11-NEXT: entry: +; GFX11-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; GFX11-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; GFX11-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SPILLTABLE]], i64 0 +; GFX11-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; GFX11-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; GFX11-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[DESC]], i64 1 +; GFX11-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], -1073676289 +; GFX11-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 786432 +; GFX11-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[DESC]], i32 [[TMP7]], i64 1 +; GFX11-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i64 2 +; GFX11-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 12 +; GFX11-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP10]], i64 2 +; GFX11-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP11]], i64 3 +; GFX11-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], -805306369 +; GFX11-NEXT: [[TMP14:%.*]] = or i32 [[TMP13]], 268435456 +; GFX11-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 3 +; GFX11-NEXT: [[TMP16:%.*]] = call i16 @llvm.amdgcn.struct.buffer.load.i16{{(\.v4i32)?}}(<4 x i32> [[TMP15]], i32 24, i32 ptrtoint (ptr addrspace(6) getelementptr inbounds (i8, ptr addrspace(6) null, i32 8) to i32), i32 0, i32 0) +; GFX11-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP16]] to half +; GFX11-NEXT: store half [[TMP17]], ptr [[OUT]], align 2 +; GFX11-NEXT: ret void +; +entry: + %ptr = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %desc, i1 false) + %146 = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) %ptr) + %buf.off = getelementptr inbounds i8, ptr addrspace(7) %ptr, i32 8 + %buf.cnv = call ptr addrspace(9) @lgc.convert.to.strided.buffer.pointer(ptr addrspace(7) %buf.off, i32 12) + %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf.cnv, i32 24) + %res = load half, ptr addrspace(9) %buf.idx, align 2 + store half %res, ptr %out, align 2 + ret void +} + define amdgpu_kernel void @strided_buffer_divergent_idx_strided_load(<4 x i32> %desc, i32 %index, ptr %out) #0 !lgc.shaderstage !4 { ; GFX11-LABEL: define amdgpu_gfx void @strided_buffer_divergent_idx_strided_load ; GFX11-SAME: (<4 x i32> [[DESC:%.*]], i32 [[INDEX:%.*]], ptr [[OUT:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR1]] !lgc.shaderstage [[META7]] { @@ -402,7 +498,7 @@ entry: %buf = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) %146 = call ptr @llvm.invariant.start.p9(i64 -1, ptr addrspace(9) %buf) %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf, i32 %index) - %res = load float, ptr addrspace(9) %buf.idx, align 16 + %res = load float, ptr addrspace(9) %buf.idx, align 4 store float %res, ptr %out, align 4 ret void } @@ -471,7 +567,7 @@ define amdgpu_kernel void @strided_buffer_divergent_ptr_strided_load(<4 x i32> % entry: %buf = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf, i32 24) - %res = load float, ptr addrspace(9) %buf.idx, align 16 + %res = load float, ptr addrspace(9) %buf.idx, align 4 store float %res, ptr %out, align 4 ret void } @@ -552,7 +648,7 @@ define amdgpu_kernel void @strided_buffer_divergent_strided_load(<4 x i32> %desc entry: %buf = call ptr addrspace(9) @lgc.load.strided.buffer.desc(i64 8589934592, i32 0, i32 0, i32 4, i32 16) %buf.idx = call ptr addrspace(9) @lgc.strided.index.add(ptr addrspace(9) %buf, i32 %index) - %res = load float, ptr addrspace(9) %buf.idx, align 16 + %res = load float, ptr addrspace(9) %buf.idx, align 4 store float %res, ptr %out, align 4 ret void } diff --git a/lgc/test/Transforms/LowerBufferOperations/uniform-phi.lgc b/lgc/test/Transforms/LowerBufferOperations/uniform-phi.lgc index f9838fe387..8cc31f432b 100644 --- a/lgc/test/Transforms/LowerBufferOperations/uniform-phi.lgc +++ b/lgc/test/Transforms/LowerBufferOperations/uniform-phi.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes='require,function(lgc-lower-buffer-operations)' %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/convert.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/convert.lgc index 05b2f71539..d2336054de 100644 --- a/lgc/test/Transforms/LowerCooperativeMatrix/convert.lgc +++ b/lgc/test/Transforms/LowerCooperativeMatrix/convert.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes=lgc-lower-cooperative-matrix %s | FileCheck --check-prefixes=CHECK %s @@ -8,13 +34,13 @@ define <8 x float> @convert_f16_to_accumulator(<8 x float> %fact) { ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x float> [[FACT:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], <8 x i32> zeroinitializer, <8 x i32> {{(splat \(i32 16\))|()}} +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], <8 x i32> zeroinitializer, <8 x i32> splat (i32 16) ; CHECK-NEXT: [[ACCUM1:%.*]] = lshr <8 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[ACCUM2:%.*]] = bitcast <8 x i32> [[ACCUM1]] to <8 x float> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[ACCUM2]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP7]] ; - %accum = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %fact, i32 1, i32 1, i32 0, i32 1) + %accum = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %fact, i32 1, i32 1, i32 0, i32 1) ret <8 x float> %accum } @@ -60,13 +86,13 @@ define <8 x float> @convert_f16_to_factor(<8 x float> %accum) { ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <8 x i32> [[TMP37]], i32 [[TMP30]], i64 7 ; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = select i1 [[TMP5]], <8 x i32> [[TMP38]], <8 x i32> [[TMP6]] -; CHECK-NEXT: [[TMP41:%.*]] = and <8 x i32> [[TMP39]], {{(splat \(i32 65535\))|()}} -; CHECK-NEXT: [[TMP42:%.*]] = shl <8 x i32> [[TMP40]], {{(splat \(i32 16\))|()}} +; CHECK-NEXT: [[TMP41:%.*]] = and <8 x i32> [[TMP39]], splat (i32 65535) +; CHECK-NEXT: [[TMP42:%.*]] = shl <8 x i32> [[TMP40]], splat (i32 16) ; CHECK-NEXT: [[TMP43:%.*]] = or <8 x i32> [[TMP41]], [[TMP42]] ; CHECK-NEXT: [[TMP44:%.*]] = bitcast <8 x i32> [[TMP43]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[TMP44]] ; - %fact = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum, i32 1, i32 1, i32 1, i32 0) + %fact = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 0, <8 x float> %accum, i32 1, i32 1, i32 1, i32 0) ret <8 x float> %fact } @@ -82,7 +108,7 @@ define <8 x i32> @convert_f16_to_bf16(<8 x float> %acc) { ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i16> [[TMP6]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP7]] ; - %fConvert = call <8 x i32> (...) @lgc.cooperative.matrix.convert__v8i32(i32 45, <8 x float> %acc, i32 1, i32 7, i32 1, i32 1) + %fConvert = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v8i32(i32 45, <8 x float> %acc, i32 1, i32 7, i32 1, i32 1) ret <8 x i32> %fConvert } @@ -149,7 +175,7 @@ define <8 x float> @convert_u4_to_f16(<2 x i32> %load) { ; CHECK-NEXT: [[TMP59:%.*]] = bitcast <16 x half> [[TMP58]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[TMP59]] ; - %convert = call <8 x float> (...) @lgc.cooperative.matrix.convert__v8f32(i32 43, <2 x i32> %load, i32 10, i32 1, i32 0, i32 0) + %convert = call <8 x float> (...) @lgc.xdl.cooperative.matrix.convert__v8f32(i32 43, <2 x i32> %load, i32 10, i32 1, i32 0, i32 0) ret <8 x float> %convert } @@ -208,7 +234,7 @@ define <2 x i32> @convert_u8_to_u4(<4 x i32> %load) { ; CHECK-NEXT: [[TMP51:%.*]] = bitcast <8 x i8> [[TMP50]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP51]] ; - %u4Convert = call <2 x i32> (...) @lgc.cooperative.matrix.convert__v2i32(i32 38, <4 x i32> %load, i32 3, i32 10, i32 0, i32 0) + %u4Convert = call <2 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v2i32(i32 38, <4 x i32> %load, i32 3, i32 10, i32 0, i32 0) ret <2 x i32> %u4Convert } @@ -269,17 +295,17 @@ define <2 x i32> @convert_fp16_to_i4(<8 x i32> %load) { ; CHECK-NEXT: [[TMP52:%.*]] = bitcast <8 x i8> [[TMP51]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP52]] ; - %u4Convert = call <2 x i32> (...) @lgc.cooperative.matrix.convert__v2i32(i32 42, <8 x i32> %load, i32 1, i32 10, i32 0, i32 0) + %u4Convert = call <2 x i32> (...) @lgc.xdl.cooperative.matrix.convert__v2i32(i32 42, <8 x i32> %load, i32 1, i32 10, i32 0, i32 0) ret <2 x i32> %u4Convert } declare i1 @getcc() declare <8 x float> @process1(<8 x float>) -declare <8 x float> @lgc.cooperative.matrix.load__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.transpose__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.convert__v8f32(...) -declare <8 x i32> @lgc.cooperative.matrix.convert__v8i32(...) -declare <8 x float> @lgc.cooperative.matrix.convert_v8i32(...) -declare <2 x i32> @lgc.cooperative.matrix.convert__v2i32(...) -declare void @lgc.cooperative.matrix.store(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.load__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.transpose__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.convert__v8f32(...) +declare <8 x i32> @lgc.xdl.cooperative.matrix.convert__v8i32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.convert_v8i32(...) +declare <2 x i32> @lgc.xdl.cooperative.matrix.convert__v2i32(...) +declare void @lgc.xdl.cooperative.matrix.store(...) diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/extract-insert.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/extract-insert.lgc index 626bb2ab54..5329cf06a3 100644 --- a/lgc/test/Transforms/LowerCooperativeMatrix/extract-insert.lgc +++ b/lgc/test/Transforms/LowerCooperativeMatrix/extract-insert.lgc @@ -1,12 +1,38 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes=lgc-lower-cooperative-matrix %s | FileCheck --check-prefixes=CHECK %s define i32 @test_length_f16() !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: @test_length_f16( -; CHECK-NEXT: [[A:%.*]] = call i32 (...) @lgc.cooperative.matrix.length__i32(i32 1, i32 0) +; CHECK-NEXT: [[A:%.*]] = call i32 (...) @lgc.xdl.cooperative.matrix.length__i32(i32 1, i32 0) ; CHECK-NEXT: ret i32 [[A]] ; - %a = call i32 (...) @lgc.cooperative.matrix.length__i32(i32 1, i32 0) + %a = call i32 (...) @lgc.xdl.cooperative.matrix.length__i32(i32 1, i32 0) ret i32 %a } @@ -16,7 +42,7 @@ define half @test_extract_f16(<8 x float> %matrix) !spirv.ExecutionModel !8 !lgc ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x half> [[TMP1]], i32 5 ; CHECK-NEXT: ret half [[TMP2]] ; - %r = call half (...) @lgc.cooperative.matrix.extract__f16(<8 x float> %matrix, i32 5, i32 1, i32 0) + %r = call half (...) @lgc.xdl.cooperative.matrix.extract__f16(<8 x float> %matrix, i32 5, i32 1, i32 0) ret half %r } @@ -27,13 +53,13 @@ define <8 x float> @test_insert_f16(<8 x float> %matrix, half %x) !spirv.Executi ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x half> [[TMP2]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[TMP3]] ; - %r = call <8 x float> (...) @lgc.cooperative.matrix.insert__v8f32(<8 x float> %matrix, half %x, i32 5, i32 1, i32 0) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.insert__v8f32(<8 x float> %matrix, half %x, i32 5, i32 1, i32 0) ret <8 x float> %r } -declare i32 @lgc.cooperative.matrix.length__i32(...) -declare half @lgc.cooperative.matrix.extract__f16(...) -declare <8 x float> @lgc.cooperative.matrix.insert__v8f32(...) +declare i32 @lgc.xdl.cooperative.matrix.length__i32(...) +declare half @lgc.xdl.cooperative.matrix.extract__f16(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.insert__v8f32(...) !llpc.compute.mode = !{!0} !lgc.client = !{!1} diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/gfx1010muladd.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1010muladd.lgc index 6aad0b526f..f6f215088b 100644 --- a/lgc/test/Transforms/LowerCooperativeMatrix/gfx1010muladd.lgc +++ b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1010muladd.lgc @@ -1,32 +1,59 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 5 + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Do not autogenerate ; RUN: lgc -march=amdgcn -o - --mcpu=gfx1010 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s define void @matmul_f16f32_emulator(ptr addrspace(3) %out0, <8 x float> %a, <8 x float> %b, <8 x float> %c0) !lgc.shaderstage !0 { - %value = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f8(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 2, i32 2, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %value, i32 16) + ; CHECK-NOT: v_dot + %value = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f8(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 2, i32 2, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %value, i32 16) ret void } define void @matmul_i16i32_emulator(ptr addrspace(3) %out0, <8 x i32> %a, <8 x i32> %b, <8 x i32> %c0) !lgc.shaderstage !0 { - %value = call <8 x i32> (...) @lgc.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 4, i32 4, i32 5, i32 5, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value, i32 16) + ; CHECK-NOT: v_dot + %value = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 4, i32 4, i32 5, i32 5, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value, i32 16) ret void } define void @matmul_i8i32_emulator(ptr addrspace(3) %out0, <8 x i32> %a, <8 x i32> %b, <8 x i32> %c0) !lgc.shaderstage !0 { - %value = call <8 x i32> (...) @lgc.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 3, i32 3, i32 5, i32 5, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value, i32 16) + ; CHECK-NOT: v_dot + %value = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 3, i32 3, i32 5, i32 5, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value, i32 16) ret void } -declare <8 x float> @lgc.cooperative.matrix.muladd__v8f8(...) -declare <8 x i32> @lgc.cooperative.matrix.muladd__v8i32(...) -declare void @lgc.cooperative.matrix.store(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.muladd__v8f8(...) +declare <8 x i32> @lgc.xdl.cooperative.matrix.muladd__v8i32(...) +declare void @lgc.xdl.cooperative.matrix.store(...) ; ShaderStage::Compute !0 = !{i32 7} ; Setting Threadgroup Dimensions to 64 x 1 x 1 !llpc.compute.mode = !{!1} !1 = !{i32 64, i32 1, i32 1} -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/gfx1011muladd.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1011muladd.lgc index 6be3cb0a23..8ee08ceee9 100644 --- a/lgc/test/Transforms/LowerCooperativeMatrix/gfx1011muladd.lgc +++ b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1011muladd.lgc @@ -1,33 +1,59 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 5 + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; NOTE: Do not autogenerate ; RUN: lgc -march=amdgcn -o - --mcpu=gfx1011 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s define void @matmul_f16f32_emulator(ptr addrspace(3) %out0, <8 x float> %a, <8 x float> %b, <8 x float> %c0) !lgc.shaderstage !0 { - %value = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f8(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 2, i32 2, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %value, i32 16) + ; CHECK: v_dot2c_f32_f16 + %value = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f8(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 1, i32 1, i32 2, i32 2, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %value, i32 16) ret void } define void @matmul_i16i32_emulator(ptr addrspace(3) %out0, <8 x i32> %a, <8 x i32> %b, <8 x i32> %c0) !lgc.shaderstage !0 { - %value = call <8 x i32> (...) @lgc.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 4, i32 4, i32 5, i32 5, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value, i32 16) + ; CHECK: v_dot2c_f32_f16 + %value = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 4, i32 4, i32 5, i32 5, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value, i32 16) ret void } define void @matmul_i8i32_emulator(ptr addrspace(3) %out0, <8 x i32> %a, <8 x i32> %b, <8 x i32> %c0) !lgc.shaderstage !0 { - %value = call <8 x i32> (...) @lgc.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 3, i32 3, i32 5, i32 5, i32 1) - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value, i32 16) + ; CHECK: v_dot2c_f32_f16 + %value = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 3, i32 3, i32 5, i32 5, i32 1) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value, i32 16) ret void } -declare <8 x float> @lgc.cooperative.matrix.muladd__v8f8(...) -declare <8 x i32> @lgc.cooperative.matrix.muladd__v8i32(...) -declare void @lgc.cooperative.matrix.store(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.muladd__v8f8(...) +declare <8 x i32> @lgc.xdl.cooperative.matrix.muladd__v8i32(...) +declare void @lgc.xdl.cooperative.matrix.store(...) ; ShaderStage::Compute !0 = !{i32 7} ; Setting Threadgroup Dimensions to 64 x 1 x 1 !llpc.compute.mode = !{!1} !1 = !{i32 64, i32 1, i32 1} - -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/gfx1100muladd.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1100muladd.lgc index 26b6c31996..9b0ef701d5 100644 --- a/lgc/test/Transforms/LowerCooperativeMatrix/gfx1100muladd.lgc +++ b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1100muladd.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 5 ; RUN: lgc -o - --mcpu=gfx1100 -passes=lgc-lower-cooperative-matrix %s | FileCheck --check-prefixes=CHECK %s @@ -11,7 +37,7 @@ define <8 x i32> @muladd_bf16_bf16(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c) { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[VALUE1]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP4]] ; - %value = call <8 x i32> (...) @lgc.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, i1 false, i1 false, i1 false, i1 false, i32 7, i32 7, i32 7, i32 7, i32 1) + %value = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, i1 false, i1 false, i1 false, i1 false, i32 7, i32 7, i32 7, i32 7, i32 1) ret <8 x i32> %value } @@ -23,7 +49,7 @@ define <8 x float> @muladd_bf16_f32(<8 x i32> %a, <8 x i32> %b, <8 x float> %c) ; CHECK-NEXT: [[VALUE1:%.*]] = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <8 x float> [[C]]) ; CHECK-NEXT: ret <8 x float> [[VALUE1]] ; - %value = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f32(<8 x i32> %a, <8 x i32> %b, <8 x float> %c, i1 false, i1 false, i1 false, i1 false, i32 7, i32 7, i32 2, i32 2, i32 1) + %value = call <8 x float> (...) @lgc.xdl.cooperative.matrix.muladd__v8f32(<8 x i32> %a, <8 x i32> %b, <8 x float> %c, i1 false, i1 false, i1 false, i1 false, i32 7, i32 7, i32 2, i32 2, i32 1) ret <8 x float> %value } @@ -33,10 +59,10 @@ define <4 x i32> @muladd_16x16x16_iu4(<2 x i32> %a, <2 x i32> %b, <4 x i32> %c) ; CHECK-NEXT: [[MULADD1:%.*]] = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.v2i32(i1 false, <2 x i32> [[A]], i1 false, <2 x i32> [[B]], <4 x i32> [[C]], i1 false) ; CHECK-NEXT: ret <4 x i32> [[MULADD1]] ; - %mulAdd = call <4 x i32> (...) @lgc.cooperative.matrix.muladd__v4i32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %c, i1 false, i1 false, i1 false, i1 false, i32 10, i32 10, i32 5, i32 5, i32 1) + %mulAdd = call <4 x i32> (...) @lgc.xdl.cooperative.matrix.muladd__v4i32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %c, i1 false, i1 false, i1 false, i1 false, i32 10, i32 10, i32 5, i32 5, i32 1) ret <4 x i32> %mulAdd } -declare <8 x i32> @lgc.cooperative.matrix.muladd__v8i32(...) -declare <8 x float> @lgc.cooperative.matrix.muladd__v8f32(...) -declare <4 x i32> @lgc.cooperative.matrix.muladd__v4i32(...) +declare <8 x i32> @lgc.xdl.cooperative.matrix.muladd__v8i32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.muladd__v8f32(...) +declare <4 x i32> @lgc.xdl.cooperative.matrix.muladd__v4i32(...) diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/load-wave64.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/load-wave64.lgc index bc9768d7e4..752f3bfa97 100644 --- a/lgc/test/Transforms/LowerCooperativeMatrix/load-wave64.lgc +++ b/lgc/test/Transforms/LowerCooperativeMatrix/load-wave64.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes=lgc-lower-cooperative-matrix %s | FileCheck --check-prefixes=CHECK %s @@ -74,7 +100,7 @@ define <8 x float> @test_f16_ab_layout(ptr addrspace(7) %ptr) !spirv.ExecutionMo ; CHECK-NEXT: [[TMP69:%.*]] = bitcast <16 x half> [[TMP68]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[TMP69]] ; - %a = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) %ptr, i32 320, i1 false, i32 1, i32 0, i32 0, i32 16, i32 16) + %a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(7) %ptr, i32 320, i1 false, i32 1, i32 0, i32 0, i32 16, i32 16) ret <8 x float> %a } @@ -106,7 +132,7 @@ define <8 x float> @test_f16_cd_layout(ptr addrspace(7) %ptr) !spirv.ExecutionMo ; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x half> [[TMP23]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[TMP24]] ; - %a = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) %ptr, i32 320, i1 false, i32 1, i32 1, i32 0, i32 16, i32 16) + %a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(7) %ptr, i32 320, i1 false, i32 1, i32 1, i32 0, i32 16, i32 16) ret <8 x float> %a } @@ -183,7 +209,7 @@ define <8 x i32> @test_i16_ab_layout(ptr addrspace(7) %ptr) !spirv.ExecutionMode ; CHECK-NEXT: [[TMP69:%.*]] = bitcast <16 x i16> [[TMP68]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP69]] ; - %a = call <8 x i32> (...) @lgc.cooperative.matrix.load__v8i32(ptr addrspace(7) %ptr, i32 320, i1 false, i32 4, i32 0, i32 0, i32 16, i32 16) + %a = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.load__v8i32(ptr addrspace(7) %ptr, i32 320, i1 false, i32 4, i32 0, i32 0, i32 16, i32 16) ret <8 x i32> %a } @@ -215,7 +241,7 @@ define <8 x i32> @test_i16_cd_layout(ptr addrspace(7) %ptr) !spirv.ExecutionMode ; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i16> [[TMP23]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP24]] ; - %a = call <8 x i32> (...) @lgc.cooperative.matrix.load__v8i32(ptr addrspace(7) %ptr, i32 320, i1 false, i32 4, i32 1, i32 0, i32 16, i32 16) + %a = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.load__v8i32(ptr addrspace(7) %ptr, i32 320, i1 false, i32 4, i32 1, i32 0, i32 16, i32 16) ret <8 x i32> %a } @@ -246,7 +272,7 @@ define <8 x float> @test_f32_cd_layout(ptr addrspace(7) %ptr) !spirv.ExecutionMo ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x float> [[TMP22]], <4 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP23]] ; - %a = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) %ptr, i32 640, i1 false, i32 2, i32 1, i32 0, i32 16, i32 16) + %a = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(7) %ptr, i32 640, i1 false, i32 2, i32 1, i32 0, i32 16, i32 16) ret <8 x float> %a } @@ -277,7 +303,7 @@ define <8 x i32> @test_i32_cd_layout(ptr addrspace(7) %ptr) !spirv.ExecutionMode ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[TMP22]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP23]] ; - %a = call <8 x i32> (...) @lgc.cooperative.matrix.load__v8i32(ptr addrspace(7) %ptr, i32 640, i1 false, i32 5, i32 1, i32 0, i32 16, i32 16) + %a = call <8 x i32> (...) @lgc.xdl.cooperative.matrix.load__v8i32(ptr addrspace(7) %ptr, i32 640, i1 false, i32 5, i32 1, i32 0, i32 16, i32 16) ret <8 x i32> %a } @@ -323,13 +349,13 @@ define <2 x i32> @test_i4_ab_layout(ptr addrspace(7) %ptr) !spirv.ExecutionModel ; CHECK-NEXT: [[TMP38:%.*]] = bitcast <8 x i8> [[TMP37]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[TMP38]] ; - %a = call <2 x i32> (...) @lgc.cooperative.matrix.load__v2i32(ptr addrspace(7) %ptr, i32 128, i1 true, i32 10, i32 0, i32 2, i32 16, i32 16) + %a = call <2 x i32> (...) @lgc.xdl.cooperative.matrix.load__v2i32(ptr addrspace(7) %ptr, i32 128, i1 true, i32 10, i32 0, i32 2, i32 16, i32 16) ret <2 x i32> %a } -declare <8 x float> @lgc.cooperative.matrix.load__v8f32(...) -declare <8 x i32> @lgc.cooperative.matrix.load__v8i32(...) -declare <2 x i32> @lgc.cooperative.matrix.load__v2i32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.load__v8f32(...) +declare <8 x i32> @lgc.xdl.cooperative.matrix.load__v8i32(...) +declare <2 x i32> @lgc.xdl.cooperative.matrix.load__v2i32(...) !llpc.compute.mode = !{!0} !lgc.client = !{!1} diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/packed-accumulators-wave64.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/packed-accumulators-wave64.lgc index 36f6c26b56..f2d295e50c 100644 --- a/lgc/test/Transforms/LowerCooperativeMatrix/packed-accumulators-wave64.lgc +++ b/lgc/test/Transforms/LowerCooperativeMatrix/packed-accumulators-wave64.lgc @@ -1,9 +1,35 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 ; RUN: lgc --mcpu=gfx1100 -o - -passes=lgc-lower-cooperative-matrix %s | FileCheck --check-prefixes=CHECK %s define <8 x float> @test_pack_f16(<8 x float> %a, <8 x float> %b) !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define <8 x float> @test_pack_f16 -; CHECK-SAME: (<8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) !spirv.ExecutionModel [[META5:![0-9]+]] !lgc.shaderstage [[META6:![0-9]+]] { +; CHECK-SAME: (<8 x float> [[A:%.*]], <8 x float> [[B:%.*]]) !spirv.ExecutionModel [[META6:![0-9]+]] !lgc.shaderstage [[META7:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x float> [[A]] to <16 x half> ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[B]] to <16 x half> @@ -12,13 +38,13 @@ define <8 x float> @test_pack_f16(<8 x float> %a, <8 x float> %b) !spirv.Executi ; CHECK-NEXT: ret <8 x float> [[TMP3]] ; entry: - %r = call <8 x float > (...) @lgc.cooperative.matrix.pack__v8f32(<8 x float> %a, <8 x float> %b) + %r = call <8 x float > (...) @lgc.xdl.cooperative.matrix.pack__v8f32(<8 x float> %a, <8 x float> %b) ret <8 x float> %r } define <8 x float> @test_unpack_lo(<8 x float> %packed) !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define <8 x float> @test_unpack_lo -; CHECK-SAME: (<8 x float> [[PACKED:%.*]]) !spirv.ExecutionModel [[META5]] !lgc.shaderstage [[META6]] { +; CHECK-SAME: (<8 x float> [[PACKED:%.*]]) !spirv.ExecutionModel [[META6]] !lgc.shaderstage [[META7]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x float> [[PACKED]] to <16 x half> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <16 x i32> @@ -26,13 +52,13 @@ define <8 x float> @test_unpack_lo(<8 x float> %packed) !spirv.ExecutionModel !8 ; CHECK-NEXT: ret <8 x float> [[TMP2]] ; entry: - %r = call <8 x float > (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> %packed, i1 false) + %r = call <8 x float > (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> %packed, i1 false) ret <8 x float> %r } define <8 x float> @test_unpack_hi(<8 x float> %packed) !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define <8 x float> @test_unpack_hi -; CHECK-SAME: (<8 x float> [[PACKED:%.*]]) !spirv.ExecutionModel [[META5]] !lgc.shaderstage [[META6]] { +; CHECK-SAME: (<8 x float> [[PACKED:%.*]]) !spirv.ExecutionModel [[META6]] !lgc.shaderstage [[META7]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x float> [[PACKED]] to <16 x half> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <16 x i32> @@ -40,13 +66,13 @@ define <8 x float> @test_unpack_hi(<8 x float> %packed) !spirv.ExecutionModel !8 ; CHECK-NEXT: ret <8 x float> [[TMP2]] ; entry: - %r = call <8 x float > (...) @lgc.cooperative.matrix.unpack__v8f32(<8 x float> %packed, i1 true) + %r = call <8 x float > (...) @lgc.xdl.cooperative.matrix.unpack__v8f32(<8 x float> %packed, i1 true) ret <8 x float> %r } define <8 x float> @test_packed_times_scalar(<8 x float> %packed, <2 x half> %scalar) !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define <8 x float> @test_packed_times_scalar -; CHECK-SAME: (<8 x float> [[PACKED:%.*]], <2 x half> [[SCALAR:%.*]]) !spirv.ExecutionModel [[META5]] !lgc.shaderstage [[META6]] { +; CHECK-SAME: (<8 x float> [[PACKED:%.*]], <2 x half> [[SCALAR:%.*]]) !spirv.ExecutionModel [[META6]] !lgc.shaderstage [[META7]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x float> [[PACKED]] to <16 x half> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x half> [[TMP0]], <16 x half> poison, <8 x i32> @@ -57,13 +83,13 @@ define <8 x float> @test_packed_times_scalar(<8 x float> %packed, <2 x half> %sc ; CHECK-NEXT: ret <8 x float> [[TMP5]] ; entry: - %r = call <8 x float> (...) @lgc.cooperative.matrix.times.scalar__v8f32(<8 x float> %packed, <2 x half> %scalar, i32 6, i32 1) + %r = call <8 x float> (...) @lgc.xdl.cooperative.matrix.times.scalar__v8f32(<8 x float> %packed, <2 x half> %scalar, i32 6, i32 1) ret <8 x float> %r } -declare <8 x float> @lgc.cooperative.matrix.pack__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.unpack__v8f32(...) -declare <8 x float> @lgc.cooperative.matrix.times.scalar__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.pack__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.unpack__v8f32(...) +declare <8 x float> @lgc.xdl.cooperative.matrix.times.scalar__v8f32(...) !llpc.compute.mode = !{!0} !lgc.client = !{!1} diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/store-wave64.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/store-wave64.lgc index f3b781b646..73dd6a78f6 100644 --- a/lgc/test/Transforms/LowerCooperativeMatrix/store-wave64.lgc +++ b/lgc/test/Transforms/LowerCooperativeMatrix/store-wave64.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes=lgc-lower-cooperative-matrix %s | FileCheck --check-prefixes=CHECK %s @@ -74,7 +100,7 @@ define void @test_f16_ab_layout(ptr addrspace(7) %ptr, <8 x float> %a) !spirv.Ex ; CHECK-NEXT: store half [[TMP53]], ptr addrspace(7) [[TMP52]], align 2 ; CHECK-NEXT: ret void ; - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 320, i1 false, i32 1, i32 0, i32 0, i32 16, <8 x float> %a, i32 16) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 320, i1 false, i32 1, i32 0, i32 0, i32 16, <8 x float> %a, i32 16) ret void } @@ -106,7 +132,7 @@ define void @test_f16_cd_layout(ptr addrspace(7) %ptr, <8 x float> %a) !spirv.Ex ; CHECK-NEXT: store half [[TMP20]], ptr addrspace(7) [[TMP19]], align 2 ; CHECK-NEXT: ret void ; - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 320, i1 false, i32 1, i32 1, i32 0, i32 16, <8 x float> %a, i32 16) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 320, i1 false, i32 1, i32 1, i32 0, i32 16, <8 x float> %a, i32 16) ret void } @@ -183,7 +209,7 @@ define void @test_i16_ab_layout(ptr addrspace(7) %ptr, <8 x i32> %a) !spirv.Exec ; CHECK-NEXT: store i16 [[TMP53]], ptr addrspace(7) [[TMP52]], align 2 ; CHECK-NEXT: ret void ; - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 320, i1 false, i32 4, i32 0, i32 0, i32 16, <8 x i32> %a, i32 16) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 320, i1 false, i32 4, i32 0, i32 0, i32 16, <8 x i32> %a, i32 16) ret void } @@ -215,7 +241,7 @@ define void @test_i16_cd_layout(ptr addrspace(7) %ptr, <8 x i32> %a) !spirv.Exec ; CHECK-NEXT: store i16 [[TMP20]], ptr addrspace(7) [[TMP19]], align 2 ; CHECK-NEXT: ret void ; - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 320, i1 false, i32 4, i32 1, i32 0, i32 16, <8 x i32> %a, i32 16) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 320, i1 false, i32 4, i32 1, i32 0, i32 16, <8 x i32> %a, i32 16) ret void } @@ -246,7 +272,7 @@ define void @test_f32_cd_layout(ptr addrspace(7) %ptr, <8 x float> %a) !spirv.Ex ; CHECK-NEXT: store float [[TMP19]], ptr addrspace(7) [[TMP18]], align 4 ; CHECK-NEXT: ret void ; - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 640, i1 false, i32 2, i32 1, i32 0, i32 16, <8 x float> %a, i32 16) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 640, i1 false, i32 2, i32 1, i32 0, i32 16, <8 x float> %a, i32 16) ret void } @@ -277,11 +303,11 @@ define void @test_i32_cd_layout(ptr addrspace(7) %ptr, <8 x i32> %a) !spirv.Exec ; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(7) [[TMP18]], align 4 ; CHECK-NEXT: ret void ; - call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 640, i1 false, i32 5, i32 1, i32 0, i32 16, <8 x i32> %a, i32 16) + call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) %ptr, i32 640, i1 false, i32 5, i32 1, i32 0, i32 16, <8 x i32> %a, i32 16) ret void } -declare void @lgc.cooperative.matrix.store(...) +declare void @lgc.xdl.cooperative.matrix.store(...) !llpc.compute.mode = !{!0} !lgc.client = !{!1} diff --git a/lgc/test/Transforms/LowerDebugPrintf/basic.lgc b/lgc/test/Transforms/LowerDebugPrintf/basic.lgc index 9c879a10d0..633988a3c3 100644 --- a/lgc/test/Transforms/LowerDebugPrintf/basic.lgc +++ b/lgc/test/Transforms/LowerDebugPrintf/basic.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: lgc -o - -passes="require,lgc-lower-debug-printf" %s | FileCheck --check-prefixes=IR %s ; RUN: lgc -o - -passes="require,lgc-lower-debug-printf,print" %s -o /dev/null 2>&1 | FileCheck --check-prefixes=PALMD %s diff --git a/lgc/test/Transforms/LowerGpuRt/init-static-id-op.lgc b/lgc/test/Transforms/LowerGpuRt/init-static-id-op.lgc index eeb886c9c0..ad1366e7c2 100644 --- a/lgc/test/Transforms/LowerGpuRt/init-static-id-op.lgc +++ b/lgc/test/Transforms/LowerGpuRt/init-static-id-op.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: lgc -mcpu=gfx1030 -o - -passes=lgc-lower-gpurt %s | FileCheck --check-prefixes=CHECK %s ; Test that each call site of @lgc.gpurt.init.static.id generates a unique ID. diff --git a/lgc/test/Transforms/LowerGpuRt/lower-gpurt-dialect-op.lgc b/lgc/test/Transforms/LowerGpuRt/lower-gpurt-dialect-op.lgc new file mode 100644 index 0000000000..48675880ac --- /dev/null +++ b/lgc/test/Transforms/LowerGpuRt/lower-gpurt-dialect-op.lgc @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 5 +; RUN: lgc -mcpu=gfx1030 -o - -passes=lgc-lower-gpurt %s | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Test the result of lowering GPURT dialect op calls like @lgc.gpurt.make.pc(). + +; ModuleID = 'lgcPipeline' +declare i64 @lgc.gpurt.make.pc__i32(i32) +declare <2 x i32> @lgc.gpurt.make.pc__v2i32(i32) +declare void @dummy.use__i64(i64 %val) +declare void @dummy.use__v2i32(<2 x i32> %val) + +define spir_func void @test(i32 %debug_in) { +; CHECK-LABEL: define spir_func void @test( +; CHECK-SAME: i32 [[DEBUG_IN:%.*]]) { +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP6]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP11]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[DEBUG_IN]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 +; CHECK-NEXT: call void @dummy.use__i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 32 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[DEBUG_IN]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP9]], i64 1 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64 [[TMP12]] to <2 x i32> +; CHECK-NEXT: call void @dummy.use__v2i32(<2 x i32> [[TMP13]]) +; CHECK-NEXT: ret void +; + %1 = call i64 @lgc.gpurt.make.pc__i32(i32 %debug_in) + call void @dummy.use__i64(i64 %1) + %2 = call <2 x i32> @lgc.gpurt.make.pc__v2i32(i32 %debug_in) + call void @dummy.use__v2i32(<2 x i32> %2) + ret void +} diff --git a/lgc/test/Transforms/PeepholeOpt/PeepholeOptLog2PowUnderflow.lgc b/lgc/test/Transforms/PeepholeOpt/PeepholeOptLog2PowUnderflow.lgc index a95dad17f3..2b4467e87b 100644 --- a/lgc/test/Transforms/PeepholeOpt/PeepholeOptLog2PowUnderflow.lgc +++ b/lgc/test/Transforms/PeepholeOpt/PeepholeOptLog2PowUnderflow.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 ; RUN: lgc -mcpu=gfx1010 -passes='require,function(lgc-peephole-optimization)' -o - %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/Transforms/ReadFirstLane/PatchReadLane.lgc b/lgc/test/Transforms/ReadFirstLane/PatchReadLane.lgc index 8e899c9b44..db72b7933d 100644 --- a/lgc/test/Transforms/ReadFirstLane/PatchReadLane.lgc +++ b/lgc/test/Transforms/ReadFirstLane/PatchReadLane.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -mcpu=gfx1010 -passes=lgc-lower-read-first-lane %s | FileCheck %s diff --git a/lgc/test/Transforms/ReadFirstLane/issue2746.lgc b/lgc/test/Transforms/ReadFirstLane/issue2746.lgc index 38f1d06146..ee81f23501 100644 --- a/lgc/test/Transforms/ReadFirstLane/issue2746.lgc +++ b/lgc/test/Transforms/ReadFirstLane/issue2746.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 3 ; RUN: lgc -o - -passes=lgc-lower-read-first-lane %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/Transforms/ReadFirstLane/simple.lgc b/lgc/test/Transforms/ReadFirstLane/simple.lgc index c6e215fcfa..645df076a3 100644 --- a/lgc/test/Transforms/ReadFirstLane/simple.lgc +++ b/lgc/test/Transforms/ReadFirstLane/simple.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - -passes=lgc-lower-read-first-lane %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/UberFetchShader.lgc b/lgc/test/UberFetchShader.lgc index 79945aacbd..30d923bb8c 100644 --- a/lgc/test/UberFetchShader.lgc +++ b/lgc/test/UberFetchShader.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: lgc -mcpu=gfx1030 -o - -passes=lgc-vertex-fetch %s | FileCheck --check-prefixes=CHECK %s ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.VS.main() diff --git a/lgc/test/WorkgroupIdOpt.lgc b/lgc/test/WorkgroupIdOpt.lgc index f9959f7209..e30345eb74 100644 --- a/lgc/test/WorkgroupIdOpt.lgc +++ b/lgc/test/WorkgroupIdOpt.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 5 ; RUN: lgc -mcpu=gfx1100 -passes=lgc-mutate-entry-point -o - %s | FileCheck --check-prefixes=CHECK %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9-p32:32:32" diff --git a/lgc/test/lgc-tool.lgc b/lgc/test/lgc-tool.lgc index 6a0228f983..056d74539e 100644 --- a/lgc/test/lgc-tool.lgc +++ b/lgc/test/lgc-tool.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Simple test that checks that the lgc command works with multiple modules in ; the same input file. ; diff --git a/lgc/test/lgcdis-longjump.lgc b/lgc/test/lgcdis-longjump.lgc index f0060cee10..fe0d1c6d68 100644 --- a/lgc/test/lgcdis-longjump.lgc +++ b/lgc/test/lgcdis-longjump.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: llvm-mc -triple=amdgcn--amdpal -mcpu=gfx1030 -filetype=obj %s | \ ; RUN: lgcdis - | FileCheck %s diff --git a/lgc/test/lgcdis-table_jump.lgc b/lgc/test/lgcdis-table_jump.lgc index 840ce9dfd7..2921728e8e 100644 --- a/lgc/test/lgcdis-table_jump.lgc +++ b/lgc/test/lgcdis-table_jump.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: llvm-mc -triple=amdgcn--amdpal -mcpu=gfx1030 -filetype=obj %s | \ ; RUN: lgcdis - | FileCheck %s diff --git a/lgc/test/lgcdis.lgc b/lgc/test/lgcdis.lgc index 30ee123612..18eaa84a34 100644 --- a/lgc/test/lgcdis.lgc +++ b/lgc/test/lgcdis.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: lgc -mcpu=gfx1030 -o %t %s ; RUN: lgcdis %t | FileCheck %s diff --git a/lgc/test/lit.cfg.py b/lgc/test/lit.cfg.py index ef5bf1faa5..ab6e39ad44 100644 --- a/lgc/test/lit.cfg.py +++ b/lgc/test/lit.cfg.py @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + # -*- Python -*- # Configuration file for the 'lit' test runner. diff --git a/lgc/test/lit.site.cfg.py.in b/lgc/test/lit.site.cfg.py.in index 8be4ba2251..489151d082 100644 --- a/lgc/test/lit.site.cfg.py.in +++ b/lgc/test/lit.site.cfg.py.in @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + @LIT_SITE_CFG_IN_HEADER@ import sys @@ -14,6 +39,18 @@ for d in "@LIT_DEFINITIONS@".split(";"): val = def_split[1] if len(def_split) > 1 else "ON" config.available_features.add(name) +breaking_revisions = [511860] + +llvm_main_revision = "@LLVM_MAIN_REVISION@" +try: + for rev in breaking_revisions: + # If there's no llvm_main_revision, we'll just assume we're using the + # latest LLVM version. + if (not llvm_main_revision) or int(llvm_main_revision) >= rev: + config.available_features.add('llvm-main-revision-ge-%d' % rev) +except ValueError: + lit_config.fatal("unable to parse LLVM_MAIN_REVISION") + # Support substitution of the tools_dir with user parameters. This is # used when we can't determine the tool dir at configuration time. try: diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc index 7ec3bea739..fd3309c9d1 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s ; ModuleID = 'lgcPipeline' diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc index 9b1a7c31cd..e1078a582c 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc index d52d45edb8..689fe4acc7 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc index 5fbe6ff84c..eeb6ae0a5e 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc index 1ad1e8cf4c..52551afce8 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc index 1ed7731c3a..bb6e74dc38 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc index 2844ab68a2..e2f9c19a22 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc index aee46c2951..190a0ebd75 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc index 0b6c0d5688..46f9761009 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s ; ModuleID = 'lgcPipeline' diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc index 26d264e908..e20ae424cb 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s ; ModuleID = 'lgcPipeline' diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc index 580581b0cd..274501c3cb 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s ; ModuleID = 'lgcPipeline' diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc index 80aea0036d..e313772bbf 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s ; ModuleID = 'lgcPipeline' diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc index d250aee16f..be0a733374 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s ; ModuleID = 'lgcPipeline' diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc index d0239817ac..156d4c1c33 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc index fd6abb50e6..9f8e85b907 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc index 1b616ebf60..1877b49a23 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 4 ; RUN: lgc -mcpu=gfx1010 -print-after=lgc-builder-replayer -o - %s 2>&1 | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/test/shaderdb/gfx1150_ds_subdword_workaround.lgc b/lgc/test/shaderdb/gfx1150_ds_subdword_workaround.lgc index fc77a85fdb..feb99066ef 100644 --- a/lgc/test/shaderdb/gfx1150_ds_subdword_workaround.lgc +++ b/lgc/test/shaderdb/gfx1150_ds_subdword_workaround.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc ; RUN: lgc -o - --mcpu=gfx1100 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s ; RUN: lgc -o - --mcpu=gfx1150 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s diff --git a/lgc/test/tanh.lgc b/lgc/test/tanh.lgc index b21cdaf689..bbc079f6ef 100644 --- a/lgc/test/tanh.lgc +++ b/lgc/test/tanh.lgc @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --tool lgc --march amdgcn --version 4 ; RUN: lgc -mcpu=gfx1100 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK %s diff --git a/lgc/tool/lgc/CMakeLists.txt b/lgc/tool/lgc/CMakeLists.txt index 2f8a01aa17..f97b70038e 100644 --- a/lgc/tool/lgc/CMakeLists.txt +++ b/lgc/tool/lgc/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -56,4 +56,4 @@ PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include ) -set_compiler_options(lgc ${LLPC_ENABLE_WERROR}) +set_compiler_options(lgc) diff --git a/lgc/tool/lgc/lgc.cpp b/lgc/tool/lgc/lgc.cpp index 9d4a8a27fd..295ff885c3 100644 --- a/lgc/tool/lgc/lgc.cpp +++ b/lgc/tool/lgc/lgc.cpp @@ -34,6 +34,7 @@ #include "lgc/LgcCpsDialect.h" #include "lgc/LgcDialect.h" #include "lgc/LgcIlCpsDialect.h" +#include "lgc/LgcXdlDialect.h" #include "lgc/PassManager.h" #include "lgc/Pipeline.h" #include "lgc/lowering/LgcLowering.h" @@ -179,8 +180,8 @@ int main(int argc, char **argv) { LgcContext::initialize(); LLVMContext context; - auto dialectContext = - llvm_dialects::DialectContext::make(context); + auto dialectContext = llvm_dialects::DialectContext::make(context); // Set our category on options that we want to show in -help, and hide other options. auto opts = cl::getRegisteredOptions(); diff --git a/lgc/tool/lgcdis/CMakeLists.txt b/lgc/tool/lgcdis/CMakeLists.txt index dde114ebce..5a695f7300 100644 --- a/lgc/tool/lgcdis/CMakeLists.txt +++ b/lgc/tool/lgcdis/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2021-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -43,4 +43,4 @@ PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../interface ) -set_compiler_options(lgcdis ${LLPC_ENABLE_WERROR}) +set_compiler_options(lgcdis) diff --git a/lgc/tool/lgcdis/lgcdis.cpp b/lgc/tool/lgcdis/lgcdis.cpp index 1303fb0ef6..2af5e60066 100644 --- a/lgc/tool/lgcdis/lgcdis.cpp +++ b/lgc/tool/lgcdis/lgcdis.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2021-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -87,7 +87,9 @@ int main(int argc, char **argv) { errs() << "\n"; return 1; } - disassembleObject((*fileOrErr)->getMemBufferRef(), ostream); + Error err = disassembleObject((*fileOrErr)->getMemBufferRef(), ostream); + if (err) + report_fatal_error(Twine((*fileOrErr)->getBufferIdentifier()) + ": " + toString(std::move(err))); } return 0; diff --git a/lgc/unittests/lit.site.cfg.py.in b/lgc/unittests/lit.site.cfg.py.in index a5223ba13f..42260e3e96 100644 --- a/lgc/unittests/lit.site.cfg.py.in +++ b/lgc/unittests/lit.site.cfg.py.in @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + @LIT_SITE_CFG_IN_HEADER@ import sys diff --git a/lgc/util/AddressExtender.cpp b/lgc/util/AddressExtender.cpp index 450fee8b15..156aa235be 100644 --- a/lgc/util/AddressExtender.cpp +++ b/lgc/util/AddressExtender.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -43,7 +43,7 @@ using namespace llvm; Instruction *AddressExtender::getFirstInsertionPt() { if (m_pc) return m_pc->getNextNode(); - return &*m_func->front().getFirstNonPHIOrDbgOrAlloca(); + return &*m_insertInto->getFirstNonPHIOrDbgOrAlloca(); } // ===================================================================================================================== @@ -93,10 +93,10 @@ Instruction *AddressExtender::extendWithPc(Value *addr32, Type *ptrTy, IRBuilder // code at the start of the function. Instruction *AddressExtender::getPc() { if (!m_pc) { - // This uses its own builder, as it wants to insert at the start of the function, whatever the caller + // This uses its own builder, as it wants to insert at the (original) start of the function, whatever the caller // is doing. IRBuilder<> builder(m_func->getContext()); - builder.SetInsertPointPastAllocas(m_func); + builder.SetInsertPoint(m_insertInto->getFirstNonPHIOrDbgOrAlloca()); Value *pc = builder.CreateIntrinsic(llvm::Intrinsic::amdgcn_s_getpc, {}, {}); pc = cast(builder.CreateBitCast(pc, FixedVectorType::get(builder.getInt32Ty(), 2))); m_pc = cast(pc); diff --git a/lgc/util/BufferResource.cpp b/lgc/util/BufferResource.cpp new file mode 100644 index 0000000000..39aa31852b --- /dev/null +++ b/lgc/util/BufferResource.cpp @@ -0,0 +1,73 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file BufferResource.cpp + * @brief LLPC source file: contains implementation of LLPC internal-use utility functions. + *********************************************************************************************************************** + */ +#include "lgc/util/BufferResource.h" + +using namespace llvm; +using namespace lgc; + +namespace lgc { +// ===================================================================================================================== +// Get 64/32 bit NumRecords from the buffer descriptor resource +// +// @gfxIpVer : GfxIp version +// @builder : Passed in Builder +// @bufferDesc : Buffer descriptor resource +Value *getBufferNumRecords(const GfxIpVersion &gfxIpVer, BuilderCommon &builder, Value *const bufferDesc) { + Value *numRecords = nullptr; + if (gfxIpVer.major <= 12) { + // Extract element 2 which is the NUM_RECORDS field from the buffer descriptor. + numRecords = builder.CreateExtractElement(bufferDesc, 2); + } else { + llvm_unreachable("Unsupported gfxip!"); + } + return numRecords; +} + +// ===================================================================================================================== +// Get 32bit Stride from the buffer descriptor resource +// +// @gfxIpVer : GfxIp version +// @builder : Passed in Builder +// @bufferDesc : Buffer descriptor resource +Value *getBufferStride(const GfxIpVersion &gfxIpVer, BuilderCommon &builder, Value *const bufferDesc) { + Value *stride = nullptr; + if (gfxIpVer.major <= 12) { + // stride[61:48] + Value *desc1 = builder.CreateExtractElement(bufferDesc, 1); + stride = builder.CreateAnd(builder.CreateLShr(desc1, builder.getInt32(16)), builder.getInt32(0x3fff)); + } else { + llvm_unreachable("Unsupported gfxip!"); + } + // TODO:stride is possibly required to updated with stride_scale, stride_scale value:0 = 1x, 1 = 4x, 2 = 8x, 3 = 32x + return stride; +} + +} // namespace lgc diff --git a/llpc/CMakeLists.txt b/llpc/CMakeLists.txt index 0011291d86..1f00aaacfc 100644 --- a/llpc/CMakeLists.txt +++ b/llpc/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -28,14 +28,9 @@ project(LLPC C CXX) ### Create LLPC Library ################################################################################################ add_library(llpcinternal STATIC "") -if(ICD_BUILD_LLPC) - add_dependencies(llpcinternal LLVMlgc) -endif() - ### Cached Project Options ############################################################################################# option(LLPC_BUILD_TESTS "LLPC build all tests" OFF) option(LLPC_BUILD_LLVM_TOOLS "Build LLVM tools" OFF) -option(LLPC_ENABLE_WERROR "Build LLPC with more errors" OFF) option(LLPC_DISABLE_SPVGEN "Build LLPC tools without SPVGEN" OFF) if(ICD_BUILD_LLPC) @@ -54,21 +49,17 @@ if(ICD_BUILD_LLPC) endforeach() endif() target_link_libraries(llpcinternal PUBLIC ${extra_llvm_libs}) - target_link_libraries(llpcinternal PRIVATE gfxruntime) + target_link_libraries(llpcinternal PRIVATE LLVMlgc gfxruntime sharedme_xdl) endif() ### Compiler Options ################################################################################################### include(../cmake/CompilerFlags.cmake) -set_compiler_options(llpcinternal ${LLPC_ENABLE_WERROR}) +set_compiler_options(llpcinternal) ### TableGen for LLPC dialect ######################################################################################## if (ICD_BUILD_LLPC) - if (EXISTS ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) - set(LLPC_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) - else() - set(LLPC_TABLEGEN_EXE $) - endif() - set(LLPC_TABLEGEN_TARGET llvm-dialects-tblgen) + include(../cmake/DialectsTablegen.cmake) + set_dialects_tablegen_exe(LLPC) set(LLVM_TARGET_DEFINITIONS include/LlpcDialect.td) file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/include") @@ -125,8 +116,8 @@ target_include_directories(llpcinternal util ../util ${PROJECT_BINARY_DIR}/include - ${XGL_PAL_PATH}/inc/core - ${XGL_PAL_PATH}/inc/util + ${PAL_SOURCE_DIR}/inc/core + ${PAL_SOURCE_DIR}/inc/util ${LLVM_INCLUDE_DIRS} ) @@ -317,7 +308,7 @@ target_include_directories(llpc include ) -set_compiler_options(llpc ${LLPC_ENABLE_WERROR}) +set_compiler_options(llpc) # This one source file is here just to stop getting cmake and ar errors about having no source files. target_sources(llpc PRIVATE @@ -393,9 +384,9 @@ target_include_directories(llpc_standalone_compiler PUBLIC ${PROJECT_SOURCE_DIR}/translator/lib/SPIRV ${PROJECT_SOURCE_DIR}/translator/lib/SPIRV/libSPIRV ${PROJECT_SOURCE_DIR}/util - ${XGL_PAL_PATH}/src/core/hw/gfxip/gfx9/chip - ${XGL_PAL_PATH}/inc/core - ${XGL_PAL_PATH}/inc/util + ${PAL_SOURCE_DIR}/src/core/hw/gfxip/gfx9/chip + ${PAL_SOURCE_DIR}/inc/core + ${PAL_SOURCE_DIR}/inc/util ${LLVM_INCLUDE_DIRS} ) @@ -420,13 +411,13 @@ if (NOT LLVM_LINK_LLVM_DYLIB) target_link_libraries(llpc_standalone_compiler PUBLIC ${llvm_libs}) endif() -set_compiler_options(llpc_standalone_compiler ${LLPC_ENABLE_WERROR}) +set_compiler_options(llpc_standalone_compiler) # Add an executable for the amdllpc standalone compiler. add_executable(amdllpc tool/amdllpc.cpp) add_dependencies(amdllpc llpc_standalone_compiler) target_link_libraries(amdllpc PRIVATE llpc_standalone_compiler) -set_compiler_options(amdllpc ${LLPC_ENABLE_WERROR}) +set_compiler_options(amdllpc) add_compile_definitions(amdllpc PRIVATE SH_EXPORTING) endif() diff --git a/llpc/context/llpcCompiler.cpp b/llpc/context/llpcCompiler.cpp index f1783d6a98..427789accd 100644 --- a/llpc/context/llpcCompiler.cpp +++ b/llpc/context/llpcCompiler.cpp @@ -1358,6 +1358,9 @@ Result Compiler::buildGraphicsShaderStage(const GraphicsPipelineBuildInfo *pipel pipelineOut->pipelineBin.codeSize = candidateElf.size(); pipelineOut->pipelineBin.pCode = code; + BinaryData binElf = {candidateElf.size(), candidateElf.data()}; + PipelineDumper::DumpPm4Crc(reinterpret_cast(pipelineDumpFile), m_gfxIp, &binElf); + if (metaDataSize > 0) { pipelineOut->fsOutputMetaData = code + candidateElf.size(); pipelineOut->fsOutputMetaDataSize = metaDataSize; @@ -1496,6 +1499,8 @@ Result Compiler::BuildColorExportShader(const GraphicsPipelineBuildInfo *pipelin pipelineOut->pipelineBin.codeSize = elf.codeSize; pipelineOut->pipelineBin.pCode = code; + PipelineDumper::DumpPm4Crc(reinterpret_cast(pipelineDumpFile), m_gfxIp, &elf); + return Result::Success; } @@ -2576,6 +2581,9 @@ Result Compiler::BuildGraphicsPipeline(const GraphicsPipelineBuildInfo *pipeline LLPC_OUTS("Adding graphics pipeline to the cache.\n"); cacheAccessor->setElfInCache(elfBin); } + + PipelineDumper::DumpPm4Crc(reinterpret_cast(pipelineDumpFile), m_gfxIp, &elfBin); + return result; } @@ -2728,6 +2736,8 @@ Result Compiler::BuildComputePipeline(const ComputePipelineBuildInfo *pipelineIn cacheAccessor->setElfInCache(elfBin); } + PipelineDumper::DumpPm4Crc(reinterpret_cast(pipelineDumpFile), m_gfxIp, &elfBin); + return Result::Success; } diff --git a/llpc/context/llpcContext.cpp b/llpc/context/llpcContext.cpp index e7a423485e..7f901d20ee 100644 --- a/llpc/context/llpcContext.cpp +++ b/llpc/context/llpcContext.cpp @@ -56,6 +56,7 @@ #include "lgc/LgcIlCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "lgc/LgcRtqDialect.h" +#include "lgc/LgcXdlDialect.h" #include "lgc/PassManager.h" #include "lgc/RuntimeContext.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -73,6 +74,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" @@ -88,6 +90,7 @@ using namespace lgc::rtq; using namespace llvm; using namespace lgc::cps; using namespace lgc::ilcps; +using namespace lgc::xdl; namespace Llpc { @@ -95,9 +98,9 @@ namespace Llpc { // // @param gfxIp : Graphics IP version info Context::Context(GfxIpVersion gfxIp) : LLVMContext(), m_gfxIp(gfxIp) { - m_dialectContext = - llvm_dialects::DialectContext::make(*this); + m_dialectContext = llvm_dialects::DialectContext::make(*this); reset(); } diff --git a/llpc/context/llpcPipelineContext.cpp b/llpc/context/llpcPipelineContext.cpp index b5788356a6..991848d9eb 100644 --- a/llpc/context/llpcPipelineContext.cpp +++ b/llpc/context/llpcPipelineContext.cpp @@ -33,6 +33,7 @@ #include "llpcCompiler.h" #include "llpcDebug.h" #include "llpcUtil.h" +#include "vkgcBase.h" #include "vkgcGpurtShim.h" #include "vkgcPipelineDumper.h" #include "lgc/Builder.h" @@ -322,6 +323,8 @@ Options PipelineContext::computePipelineOptions() const { options.rtTriCompressMode = m_rtState.triCompressMode; options.disablePerCompFetch = getPipelineOptions()->disablePerCompFetch; + options.padBufferSizeToNextDword = getPipelineOptions()->padBufferSizeToNextDword; + return options; } @@ -639,7 +642,7 @@ void PipelineContext::convertResourceNode(ResourceNode &dst, const ResourceMappi auto &immutableNode = *it->second; if (immutableNode.arraySize != 0) { - if (src.type == ResourceMappingNodeType::DescriptorYCbCrSampler) { + if (src.type == ResourceMappingNodeType::DescriptorYCbCrSampler && src.srdRange.strideInDwords == 0) { // TODO: Remove the statement when dst.stride is per array size // Update dst.stride = node.sizeInDwords / immutableNode.arraySize dst.stride /= immutableNode.arraySize; @@ -803,6 +806,15 @@ ShaderOptions PipelineContext::computeShaderOptions(const PipelineShaderInfo &sh shaderOptions.viewIndexFromDeviceIndex = shaderInfo.options.viewIndexFromDeviceIndex; shaderOptions.forceUnderflowPrevention = shaderInfo.options.forceUnderflowPrevention; + + static_assert(static_cast(Vkgc::LlvmScheduleStrategy::MaxIlp) == + lgc::LlvmScheduleStrategy::MaxIlp, + "Mismatch"); + static_assert(static_cast(Vkgc::LlvmScheduleStrategy::MaxMemoryClause) == + lgc::LlvmScheduleStrategy::MaxMemoryClause, + "Mismatch"); + shaderOptions.scheduleStrategy = static_cast(shaderInfo.options.scheduleStrategy); + return shaderOptions; } diff --git a/llpc/lowering/LinkTransformShaders.cpp b/llpc/lowering/LinkTransformShaders.cpp index 2ee63a80fb..f20460f285 100644 --- a/llpc/lowering/LinkTransformShaders.cpp +++ b/llpc/lowering/LinkTransformShaders.cpp @@ -122,7 +122,7 @@ void LinkTransformShaders::processLibraryFunction(Function *&func, Function *tra args.push_back(v); } - CompilerUtils::CrossModuleInliner inliner; + compilerutils::CrossModuleInliner inliner; auto *vsOutput = inliner.inlineCall(*m_builder, transformVsFunc, {args}).returnValue; if (primCulling) { diff --git a/llpc/lowering/LowerAdvancedBlend.cpp b/llpc/lowering/LowerAdvancedBlend.cpp index 8a01ed59be..f932435f5f 100644 --- a/llpc/lowering/LowerAdvancedBlend.cpp +++ b/llpc/lowering/LowerAdvancedBlend.cpp @@ -134,7 +134,7 @@ void LowerAdvancedBlend::processFsOutputs(Module &module) { auto *advancedBlendFunc = (*gfxRuntimeContext.theModule).getFunction(m_enableRov ? AdvancedBlendInternalRov : AdvancedBlendInternal); - CompilerUtils::CrossModuleInliner inliner; + compilerutils::CrossModuleInliner inliner; // Call AmdAdvancedBlendInternal() for each output for (auto [i, outCol] : llvm::enumerate(outputs)) { diff --git a/llpc/lowering/LowerCooperativeMatrix.cpp b/llpc/lowering/LowerCooperativeMatrix.cpp index 3e4c9f0fd4..67d9ec51e0 100644 --- a/llpc/lowering/LowerCooperativeMatrix.cpp +++ b/llpc/lowering/LowerCooperativeMatrix.cpp @@ -34,8 +34,10 @@ #include "LowerCooperativeMatrix.h" #include "llpcDialect.h" +#include "xdl/util/ElementType.h" #include "lgc/BuilderCommon.h" #include "lgc/LgcDialect.h" +#include "lgc/LgcXdlDialect.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -43,6 +45,7 @@ using namespace llvm; using namespace lgc; +using namespace lgc::xdl; using namespace Llpc; namespace { @@ -98,7 +101,7 @@ PreservedAnalyses LowerCooperativeMatrix::run() { void LowerCooperativeMatrix::visitProxy(CallInst &call) { Value *ptr = call.getArgOperand(0); auto elemTypeEnum = (CooperativeMatrixElementType)(cast(call.getArgOperand(1))->getZExtValue()); - Type *elemType = m_builder.transCooperativeMatrixElementType(elemTypeEnum); + Type *elemType = transCooperativeMatrixElementType(m_builder, elemTypeEnum); auto layout = (CooperativeMatrixLayout)(cast(call.getArgOperand(2))->getZExtValue()); m_toDelete.push_back(&call); @@ -126,16 +129,16 @@ void LowerCooperativeMatrix::visitPointerUsers(Value *ptr, CooperativeMatrixElem assert(load->getPointerOperand() == ptr); assert(load->getType() == elemType); - Type *matrixType = m_builder.getCooperativeMatrixTy(elemTypeEnum, layout); + Type *matrixType = getCooperativeMatrixTy(m_builder, elemTypeEnum, layout); Value *matrix = m_builder.CreateLoad(matrixType, matrixPtr); - Type *elemTy = m_builder.transCooperativeMatrixElementType(elemTypeEnum); + Type *elemTy = transCooperativeMatrixElementType(m_builder, elemTypeEnum); Value *element = m_builder.create(elemTy, matrix, index, elemTypeEnum, layout); load->replaceAllUsesWith(element); } else if (auto *store = dyn_cast(inst)) { assert(store->getPointerOperand() == ptr); assert(store->getValueOperand()->getType() == elemType); - Type *matrixType = m_builder.getCooperativeMatrixTy(elemTypeEnum, layout); + Type *matrixType = getCooperativeMatrixTy(m_builder, elemTypeEnum, layout); Value *matrix = m_builder.CreateLoad(matrixType, matrixPtr); matrix = m_builder.create(matrix->getType(), matrix, store->getValueOperand(), index, elemTypeEnum, layout); diff --git a/llpc/lowering/LowerGlCompatibility.cpp b/llpc/lowering/LowerGlCompatibility.cpp index 88942528a6..42a10acac9 100644 --- a/llpc/lowering/LowerGlCompatibility.cpp +++ b/llpc/lowering/LowerGlCompatibility.cpp @@ -48,10 +48,10 @@ namespace Llpc { // ===================================================================================================================== LowerGlCompatibility::LowerGlCompatibility() - : m_retInst(nullptr), m_entryPointEnd(nullptr), m_originalEntryBlock(nullptr), m_clipVertex(nullptr), - m_clipDistance(nullptr), m_clipPlane(nullptr), m_frontColor(nullptr), m_backColor(nullptr), - m_frontSecondaryColor(nullptr), m_backSecondaryColor(nullptr), m_color(nullptr), m_secondaryColor(nullptr), - m_frontFacing(nullptr), m_patchTexCoord(nullptr), m_fragColor(nullptr), m_fragDepth(), m_fragStencilRef() { + : m_retInst(nullptr), m_clipVertex(nullptr), m_clipDistance(nullptr), m_clipPlane(nullptr), m_frontColor(nullptr), + m_backColor(nullptr), m_frontSecondaryColor(nullptr), m_backSecondaryColor(nullptr), m_color(nullptr), + m_secondaryColor(nullptr), m_frontFacing(nullptr), m_patchTexCoord(nullptr), m_fragColor(nullptr), m_fragDepth(), + m_fragStencilRef() { } // ===================================================================================================================== @@ -319,18 +319,6 @@ void LowerGlCompatibility::buildPatchPositionInfo() { collectEmitInst(); else unifyFunctionReturn(m_entryPoint); - - // Create early kill block for bitmap, bitmap require a early return in masked thread. - if (needEmulateBitmap()) { - m_originalEntryBlock = &(m_entryPoint->getEntryBlock()); - m_originalEntryBlock->splitBasicBlockBefore(m_originalEntryBlock->getFirstInsertionPt(), ".gl.compatibility.entry"); - m_entryPointEnd = m_originalEntryBlock->splitBasicBlockBefore(m_originalEntryBlock->getFirstInsertionPt(), - ".gl.compatibility.kill"); - m_builder->SetInsertPoint(m_entryPointEnd->begin()); - m_builder->CreateKill(); - ReturnInst::Create(*m_context, m_entryPointEnd); - m_entryPointEnd->back().eraseFromParent(); - } } // ===================================================================================================================== @@ -502,7 +490,7 @@ void LowerGlCompatibility::createClipPlane() { auto vec4Type = FixedVectorType::get(floatType, 4); auto clipPlaneType = ArrayType::get(vec4Type, 8); auto clipPlane = - new GlobalVariable(*m_module, clipPlaneType, false, GlobalValue::ExternalLinkage, nullptr, "gl_ClipPlaneInternal", + new GlobalVariable(*m_module, clipPlaneType, true, GlobalValue::ExternalLinkage, nullptr, "gl_ClipPlaneInternal", nullptr, GlobalVariable::NotThreadLocal, SPIRV::SPIRAS_Uniform); auto locationFound = getUniformConstantEntryByLocation(m_context, m_shaderStage, Vkgc::GlCompatibilityUniformLocation::ClipPlane); @@ -748,32 +736,34 @@ void LowerGlCompatibility::emulateTwoSideLighting() { void LowerGlCompatibility::emulateBitmap() { auto *buildInfo = static_cast(m_context->getPipelineBuildInfo()); m_builder->SetInsertPoint(m_entryPoint->getEntryBlock().begin()); - auto floatType = m_builder->getFloatTy(); - auto int32Type = m_builder->getInt32Ty(); - auto vec2Type = FixedVectorType::get(floatType, 2); - auto ivec2Type = FixedVectorType::get(int32Type, 2); + Value *constInt0x7 = ConstantInt::get(m_builder->getInt32Ty(), 0x7); + Value *constInt0x3 = ConstantInt::get(m_builder->getInt32Ty(), 0x3); if (!m_patchTexCoord) { createPatchTexCoord(); } - Value *constInt0x7 = ConstantInt::get(ivec2Type, 0x7); - Value *constInt0x3 = ConstantInt::get(ivec2Type, 0x3); - Value *patchTexcoord = m_builder->CreateLoad(vec2Type, m_patchTexCoord); - Value *texcoord = m_builder->CreateFPToUI(patchTexcoord, ivec2Type); - Value *mask = m_builder->CreateAnd(texcoord, constInt0x7); - if (buildInfo->glState.enableBitmapLsb) { - mask = m_builder->CreateSub(mask, constInt0x7); + Value *patchTexcoord = m_builder->CreateLoad(FixedVectorType::get(m_builder->getFloatTy(), 2), m_patchTexCoord); + Value *texcoordBits = m_builder->CreateFPToUI(patchTexcoord, FixedVectorType::get(m_builder->getInt32Ty(), 2)); + Value *texcoordX = m_builder->CreateExtractElement(texcoordBits, uint64_t(0)); + Value *bitMaskResult = m_builder->CreateAnd(texcoordX, constInt0x7); + if (!buildInfo->glState.enableBitmapLsb) { + bitMaskResult = m_builder->CreateSub(constInt0x7, bitMaskResult); } - mask = m_builder->CreateShl(ConstantInt::get(ivec2Type, 1), mask); - Value *texCoordSrc = m_builder->CreateLShr(constInt0x3, texcoord); + + bitMaskResult = m_builder->CreateShl(ConstantInt::get(m_builder->getInt32Ty(), 1), bitMaskResult); + texcoordX = m_builder->CreateLShr(texcoordX, constInt0x3); + texcoordBits = m_builder->CreateInsertElement(texcoordBits, texcoordX, uint64_t(0)); + auto imageDescPtr = m_builder->CreateGetDescPtr( lgc::ResourceNodeType::DescriptorResource, lgc::ResourceNodeType::DescriptorResource, PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorResource), Vkgc::InternalBinding::PixelOpInternalBinding); - Value *texel = m_builder->CreateImageLoad(ivec2Type, Dim2D, 0, imageDescPtr, texCoordSrc, nullptr); - Value *val = m_builder->CreateAnd(mask, texel); - val = m_builder->CreateExtractElement(val, ConstantInt::get(int32Type, 0)); - auto cmp = m_builder->CreateICmpEQ(val, ConstantInt::get(int32Type, 0)); - m_builder->CreateCondBr(cmp, m_entryPointEnd, m_originalEntryBlock); + Value *cmpResult = m_builder->CreateImageLoad(FixedVectorType::get(m_builder->getInt32Ty(), 2), Dim2D, 0, + imageDescPtr, texcoordBits, nullptr); + cmpResult = m_builder->CreateExtractElement(cmpResult, uint64_t(0)); + cmpResult = m_builder->CreateAnd(bitMaskResult, cmpResult); + cmpResult = m_builder->CreateICmpEQ(cmpResult, ConstantInt::get(m_builder->getInt32Ty(), 0)); + m_builder->SetInsertPoint(SplitBlockAndInsertIfThen(cmpResult, m_builder->GetInsertPoint(), false)); + m_builder->CreateKill(); } // ===================================================================================================================== @@ -783,8 +773,9 @@ void LowerGlCompatibility::emulateBitmap() { // @param [in] valTy : current input value's type, should be global's valueType in top-level. // @param [in] metaVal : metadata value of current output variable. // @param [in] alphaScaleVal : calculated alpha scaling results, default value is one. -void LowerGlCompatibility::patchAlphaScaling(Value *val, Type *valTy, Constant *metaVal, Value *alphaScaleVal) { +Value *LowerGlCompatibility::patchAlphaScaling(Value *val, Type *valTy, Constant *metaVal, Value *alphaScaleVal) { ShaderInOutMetadata outputMeta = {}; + Value *returnVal = nullptr; if (valTy->isArrayTy()) { outputMeta.U64All[0] = cast(metaVal->getOperand(2))->getZExtValue(); @@ -792,18 +783,22 @@ void LowerGlCompatibility::patchAlphaScaling(Value *val, Type *valTy, Constant * if (!outputMeta.IsBuiltIn) { auto elemMeta = cast(metaVal->getOperand(1)); - const uint64_t elemCount = val->getType()->getArrayNumElements(); + const uint64_t elemCount = valTy->getArrayNumElements(); for (unsigned idx = 0; idx < elemCount; ++idx) { Value *elem = m_builder->CreateExtractValue(val, {idx}, ""); - patchAlphaScaling(elem, elem->getType(), elemMeta, alphaScaleVal); + returnVal = patchAlphaScaling(elem, elem->getType(), elemMeta, alphaScaleVal); + if (returnVal != nullptr) + returnVal = m_builder->CreateInsertValue(val, returnVal, {idx}, ""); } } } else if (valTy->isStructTy()) { - const uint64_t memberCount = val->getType()->getStructNumElements(); + const uint64_t memberCount = valTy->getStructNumElements(); for (unsigned memberIdx = 0; memberIdx < memberCount; ++memberIdx) { auto memberMeta = cast(metaVal->getOperand(memberIdx)); Value *member = m_builder->CreateExtractValue(val, {memberIdx}); - patchAlphaScaling(member, member->getType(), memberMeta, alphaScaleVal); + returnVal = patchAlphaScaling(member, member->getType(), memberMeta, alphaScaleVal); + if (returnVal != nullptr) + returnVal = m_builder->CreateInsertValue(val, returnVal, {memberIdx}, ""); } } else { Constant *inOutMetaConst = cast(metaVal); @@ -812,14 +807,14 @@ void LowerGlCompatibility::patchAlphaScaling(Value *val, Type *valTy, Constant * // When enabling line smooth, alpha channel will be patched with a scaling factor. if (!outputMeta.IsBuiltIn && outputMeta.NumComponents == 4 && alphaScaleVal) { - Value *outputValue = m_builder->CreateLoad(valTy, val); - Value *scaledAlpha = m_builder->CreateExtractElement(outputValue, 3); + Value *scaledAlpha = m_builder->CreateExtractElement(val, 3); Value *alphaScaleFactor = m_builder->CreateLoad(m_builder->getFloatTy(), alphaScaleVal); scaledAlpha = m_builder->CreateFMul(alphaScaleFactor, scaledAlpha); - outputValue = m_builder->CreateInsertElement(outputValue, scaledAlpha, m_builder->getInt32(3)); - m_builder->CreateStore(outputValue, val); + returnVal = m_builder->CreateInsertElement(val, scaledAlpha, m_builder->getInt32(3)); } } + + return returnVal; } // ===================================================================================================================== @@ -874,9 +869,7 @@ void LowerGlCompatibility::emulateSmoothStipple() { Value *xInByteOffset = m_builder->CreateAnd(calcFragCoordX, m_builder->getInt32(0x7u)); // xInByteOffset = 7 - xInByteOffset // Due to concern with default turned on option LsbFirst, x bits are in reverse order within each 8 bits pattern. - if (pipelineBuildInfo->glState.enableBitmapLsb) { - xInByteOffset = m_builder->CreateSub(m_builder->getInt32(0x7u), xInByteOffset); - } + xInByteOffset = m_builder->CreateSub(m_builder->getInt32(0x7u), xInByteOffset); // xOffset = xInByteOffset + xOffset xOffset = m_builder->CreateAdd(xOffset, xInByteOffset); @@ -934,7 +927,10 @@ void LowerGlCompatibility::emulateSmoothStipple() { auto addrSpace = global.getType()->getAddressSpace(); if (addrSpace == SPIRAS_Output) { auto outputMetaVal = mdconst::extract(global.getMetadata(gSPIRVMD::InOut)->getOperand(0)); - patchAlphaScaling(&global, global.getValueType(), outputMetaVal, alphaScaleVal); + Value *patchedVal = m_builder->CreateLoad(global.getValueType(), &global); + patchedVal = patchAlphaScaling(patchedVal, global.getValueType(), outputMetaVal, alphaScaleVal); + if (patchedVal != nullptr) + m_builder->CreateStore(patchedVal, &global); } } } @@ -985,7 +981,10 @@ void LowerGlCompatibility::emulateSmoothStipple() { auto addrSpace = global.getType()->getAddressSpace(); if (addrSpace == SPIRAS_Output) { auto outputMetaVal = mdconst::extract(global.getMetadata(gSPIRVMD::InOut)->getOperand(0)); - patchAlphaScaling(&global, global.getValueType(), outputMetaVal, alphaScaleVal); + Value *patchedVal = m_builder->CreateLoad(global.getValueType(), &global); + patchedVal = patchAlphaScaling(patchedVal, global.getValueType(), outputMetaVal, alphaScaleVal); + if (patchedVal != nullptr) + m_builder->CreateStore(patchedVal, &global); } } } @@ -1146,7 +1145,7 @@ void LowerGlCompatibility::lowerAlphaTest() { Value *outputAlpha = m_builder->CreateExtractElement(outputValue, 3); // get alphaRef - auto alphaRef = new GlobalVariable(*m_module, floatTy, false, GlobalValue::ExternalLinkage, nullptr, "alphaTestRef", + auto alphaRef = new GlobalVariable(*m_module, floatTy, true, GlobalValue::ExternalLinkage, nullptr, "alphaTestRef", nullptr, GlobalVariable::NotThreadLocal, SPIRV::SPIRAS_Uniform); auto locationFound = getUniformConstantEntryByLocation(m_context, m_shaderStage, Vkgc::GlCompatibilityUniformLocation::AlphaTestRef); diff --git a/llpc/lowering/LowerGlCompatibility.h b/llpc/lowering/LowerGlCompatibility.h index d03debe6d6..423cfbb61d 100644 --- a/llpc/lowering/LowerGlCompatibility.h +++ b/llpc/lowering/LowerGlCompatibility.h @@ -84,7 +84,6 @@ class LowerGlCompatibility : public SpirvLower, public llvm::PassInfoMixin m_emitCalls; // "Call" instructions to emit vertex (geometry shader). llvm::ReturnInst *m_retInst; // "Return" of the entry point. - llvm::BasicBlock *m_entryPointEnd; // The end block of the entry point, use for early return. - llvm::BasicBlock *m_originalEntryBlock; // The original entry block of entry point. // The resource use to lower gl_ClipVertex llvm::User *m_clipVertex; // The global variable of gl_ClipVertex diff --git a/llpc/lowering/LowerGlobals.cpp b/llpc/lowering/LowerGlobals.cpp index 438f9c4881..bf2894f2d7 100644 --- a/llpc/lowering/LowerGlobals.cpp +++ b/llpc/lowering/LowerGlobals.cpp @@ -233,7 +233,7 @@ PreservedAnalyses LowerGlobals::run(Module &module, ModuleAnalysisManager &analy // Collect "emit" calls handleCallInst(true, false); } else if (m_shaderStage < ShaderStageGfxCount) { - m_unifiedReturn = CompilerUtils::unifyReturns(*m_entryPoint, *m_builder); + m_unifiedReturn = compilerutils::unifyReturns(*m_entryPoint, *m_builder); } // Preparations for XFB handling @@ -571,7 +571,7 @@ void LowerGlobals::lowerInOut(llvm::GlobalVariable *globalVar) { } SmallVector toErase; - CompilerUtils::replaceAllPointerUses(globalVar, proxy, toErase); + compilerutils::replaceAllPointerUses(globalVar, proxy, toErase); for (auto inst : toErase) inst->eraseFromParent(); } else { @@ -2080,6 +2080,9 @@ void LowerGlobals::lowerUniformConstants() { Value *bufferDesc = m_builder->create( uniformConstantsSet, uniformConstantsBinding, m_builder->getInt32(0), lgc::Builder::BufferFlagNonConst); Value *newPtr = m_builder->CreateConstInBoundsGEP1_32(m_builder->getInt8Ty(), bufferDesc, uniformConstantsOffset); + // Default uniform variables are always initialized by driver, and shader never writes to it. So it is invariant + // during shader execution. + m_builder->CreateInvariantStart(newPtr); for (auto *inst : eachFunc.second) inst->replaceUsesOfWith(&global, newPtr); } diff --git a/llpc/lowering/LowerMath.cpp b/llpc/lowering/LowerMath.cpp index 1707c4f9af..5b97387067 100644 --- a/llpc/lowering/LowerMath.cpp +++ b/llpc/lowering/LowerMath.cpp @@ -266,7 +266,6 @@ Function *LowerMathConstFolding::getEntryPoint() { bool LowerMathPrecision::adjustExports(Module &module, bool disablePositionOpt) { bool changed = false; - ShaderStage preFragmentStage = getLastVertexProcessingStage(); for (auto &func : module.functions()) { // Disable fast math for gl_Position. // TODO: This requires knowledge of the Builder implementation, which is not ideal. @@ -292,7 +291,7 @@ bool LowerMathPrecision::adjustExports(Module &module, bool disablePositionOpt) valueWritten = callInst->getOperand(0); } - if (valueWritten && builtIn == lgc::BuiltInPosition && m_shaderStage == preFragmentStage) { + if (valueWritten && builtIn == lgc::BuiltInPosition) { disableFastMath(valueWritten, disablePositionOpt); changed = true; } @@ -301,17 +300,6 @@ bool LowerMathPrecision::adjustExports(Module &module, bool disablePositionOpt) return changed; } -Vkgc::ShaderStage LowerMathPrecision::getLastVertexProcessingStage() const { - auto stageMask = m_context->getShaderStageMask(); - for (auto stage : {Vkgc::ShaderStageMesh, Vkgc::ShaderStageGeometry, Vkgc::ShaderStageTessEval, - Vkgc::ShaderStageTessControl, Vkgc::ShaderStageVertex}) { - unsigned int stageBit = 1 << stage; - if (stageMask & stageBit) - return stage; - } - return Vkgc::ShaderStageInvalid; -} - static bool clearContractFlag(Instruction *inst) { if (!isa(inst)) return false; diff --git a/llpc/lowering/LowerMath.h b/llpc/lowering/LowerMath.h index 0f86384c35..1dd74ff592 100644 --- a/llpc/lowering/LowerMath.h +++ b/llpc/lowering/LowerMath.h @@ -79,7 +79,6 @@ class LowerMathPrecision : public SpirvLower, public llvm::PassInfoMixinsetOperand(operandIndex, constIndex); getElemPtrs.push_back(getElemPtr); - getElemPtr->insertBefore(&getElemPtrInst); + getElemPtr->insertBefore(getElemPtrInst.getIterator()); } // Copy users, ExpandStoreInst/ExpandLoadInst change getElemPtrInst's user diff --git a/llpc/lowering/LowerRayTracing.cpp b/llpc/lowering/LowerRayTracing.cpp index a7c3e7d997..1e59bd2fcb 100644 --- a/llpc/lowering/LowerRayTracing.cpp +++ b/llpc/lowering/LowerRayTracing.cpp @@ -68,7 +68,7 @@ extern opt TrimDebugInfo; using namespace llvm; using namespace Llpc; using namespace lgc::rt; -using namespace CompilerUtils; +using namespace compilerutils; namespace SPIRV { extern const char *MetaNameSpirvOp; @@ -218,7 +218,7 @@ class SpirvLowerRayTracingImpl : public SpirvLower { llvm::Value *createLoadInstNodeAddr(); lgc::rt::RayTracingShaderStage mapStageToLgcRtShaderStage(ShaderStage stage); - std::optional m_crossModuleInliner; + std::optional m_crossModuleInliner; unsigned m_spirvOpMetaKindId; // Metadata kind ID for "spirv.op" llvm::Value *m_traceParams[TraceParam::Count]; // Trace ray set parameters diff --git a/llpc/lowering/Lowering.cpp b/llpc/lowering/Lowering.cpp index 53db846789..8c066fdf2c 100644 --- a/llpc/lowering/Lowering.cpp +++ b/llpc/lowering/Lowering.cpp @@ -43,7 +43,6 @@ #include "LowerTerminator.h" #include "LowerTranslator.h" #include "LoweringUtil.h" -#include "ProcessGpuRtLibrary.h" #include "ScalarReplacementOfBuiltins.h" #include "llpcContext.h" #include "llpcDebug.h" diff --git a/llpc/lowering/PrepareContinuations.cpp b/llpc/lowering/PrepareContinuations.cpp index edd02bcbee..c15f3b52a3 100644 --- a/llpc/lowering/PrepareContinuations.cpp +++ b/llpc/lowering/PrepareContinuations.cpp @@ -41,7 +41,7 @@ using namespace lgc; using namespace llvm; using namespace lgc::rt; -using namespace CompilerUtils; +using namespace compilerutils; namespace Llpc { PrepareContinuations::PrepareContinuations() { @@ -64,7 +64,11 @@ PreservedAnalyses PrepareContinuations::run(Module &module, ModuleAnalysisManage mode.workgroupSizeZ = 1; mode.noLocalInvocationIdInCalls = true; Pipeline::setComputeShaderMode(module, mode); - ContHelper::setStackAddrspace(module, ContStackAddrspace::ScratchLLPC); + auto &rtContext = *static_cast(m_context->getPipelineContext()); + ContHelper::setStackAddrspace(module, rtContext.getRayTracingPipelineBuildInfo()->cpsFlags & + Vkgc::CpsFlag::CpsFlagStackInGlobalMem + ? ContStackAddrspace::GlobalLLPC + : ContStackAddrspace::ScratchLLPC); if (module.getName().starts_with("main")) { m_shaderStage = ShaderStageRayTracingRayGen; @@ -81,12 +85,9 @@ PreservedAnalyses PrepareContinuations::run(Module &module, ModuleAnalysisManage lgc::Pipeline::markShaderEntryPoint(entryFunc, lgc::ShaderStage::Compute); } else { m_entryPoint->setName(module.getName()); - auto rtContext = static_cast(m_context->getPipelineContext()); - ContHelper::setMaxPayloadRegisterCount(module, cps::CpsPayloadMaxNumVgprs); - - setMaxHitAttributeSize(&module, rtContext->getAttributeDataSizeInBytes()); - setMaxPayloadSize(&module, rtContext->getPayloadSizeInBytes()); + setMaxHitAttributeSize(&module, rtContext.getAttributeDataSizeInBytes()); + setMaxPayloadSize(&module, rtContext.getPayloadSizeInBytes()); } return PreservedAnalyses::none(); diff --git a/llpc/lowering/PrepareTransformVertexShader.cpp b/llpc/lowering/PrepareTransformVertexShader.cpp index e148429dde..07480546c0 100644 --- a/llpc/lowering/PrepareTransformVertexShader.cpp +++ b/llpc/lowering/PrepareTransformVertexShader.cpp @@ -37,7 +37,7 @@ using namespace lgc; using namespace llvm; -using namespace CompilerUtils; +using namespace compilerutils; namespace Llpc { #define DEBUG_TYPE "prepare-transform-shader" @@ -130,7 +130,7 @@ void PrepareTransformVertexShader::genFunTransformVertex(Function &function) { Value *vsOutput = PoisonValue::get(structTy); // 2. Handle early returns - m_unifiedReturn = CompilerUtils::unifyReturns(function, *m_builder); + m_unifiedReturn = compilerutils::unifyReturns(function, *m_builder); m_builder->SetInsertPoint(m_unifiedReturn); // 3. Store gl_Position, gl_ClipDistance, gl_FrontColor and gl_TextureCoord[0] in the struct diff --git a/llpc/lowering/ProcessGfxRuntimeLibrary.cpp b/llpc/lowering/ProcessGfxRuntimeLibrary.cpp index c4906e94cc..08dad6668c 100644 --- a/llpc/lowering/ProcessGfxRuntimeLibrary.cpp +++ b/llpc/lowering/ProcessGfxRuntimeLibrary.cpp @@ -91,7 +91,7 @@ void ProcessGfxRuntimeLibrary::processLibraryFunction(Function *&func) { continue; promotionMask.set(argId); } - func = CompilerUtils::promotePointerArguments(func, promotionMask); + func = compilerutils::promotePointerArguments(func, promotionMask); return; } diff --git a/llpc/lowering/ProcessGpuRtLibrary.cpp b/llpc/lowering/ProcessGpuRtLibrary.cpp index b330dadd08..7d85d75434 100644 --- a/llpc/lowering/ProcessGpuRtLibrary.cpp +++ b/llpc/lowering/ProcessGpuRtLibrary.cpp @@ -132,7 +132,7 @@ PreservedAnalyses ProcessGpuRtLibrary::run(Module &module, ModuleAnalysisManager Function *func = argPromotionsFunc.first; if (func->getLinkage() == GlobalValue::InternalLinkage) continue; - CompilerUtils::promotePointerArguments(func, argPromotionsFunc.second); + compilerutils::promotePointerArguments(func, argPromotionsFunc.second); } // Process ray-tracing (i.e. non-rayQuery) functions in a separate loop; processLibraryFunction() may do @@ -205,6 +205,7 @@ ProcessGpuRtLibrary::LibraryFunctionTable::LibraryFunctionTable() { m_libFuncPtrs["AmdTraceRayInitStaticId"] = &ProcessGpuRtLibrary::createInitStaticId; #endif m_libFuncPtrs["AmdTraceRayGetKnownSetRayFlags"] = &ProcessGpuRtLibrary::createGetKnownSetRayFlags; + m_libFuncPtrs["AmdTraceRayMakePC"] = &ProcessGpuRtLibrary::createMakePc; m_libFuncPtrs["AmdTraceRayGetKnownUnsetRayFlags"] = &ProcessGpuRtLibrary::createGetKnownUnsetRayFlags; m_libFuncPtrs["_AmdContStackAlloc"] = &ProcessGpuRtLibrary::createContStackAlloc; m_libFuncPtrs["_AmdContStackFree"] = &ProcessGpuRtLibrary::createContStackFree; @@ -250,7 +251,7 @@ bool ProcessGpuRtLibrary::processLibraryFunction(Function *&func) { // The intrinsic handling require first argument to be a pointer, the rest to be values. SmallBitVector promotionMask(func->arg_size(), true); promotionMask.reset(0); - auto newFunc = CompilerUtils::promotePointerArguments(func, promotionMask); + auto newFunc = compilerutils::promotePointerArguments(func, promotionMask); if (funcName.starts_with("_AmdValueGetI32")) ContHelper::handleValueGetI32(*newFunc, *m_builder); else @@ -527,16 +528,17 @@ void ProcessGpuRtLibrary::createConvertF32toF16WithRoundingMode(Function *func, // // @param func : The function to create void ProcessGpuRtLibrary::createIntersectBvh(Function *func) { - assert(m_gpurtKey.bvhResDesc.size() != 0); - if (m_gpurtKey.bvhResDesc.size() < 4) + if (m_gpurtKey.bvhResDesc.size() < 4) { + m_builder->CreateRet(PoisonValue::get(func->getReturnType())); return; + } #if GPURT_CLIENT_INTERFACE_MAJOR_VERSION < 33 - // Ray tracing utility function: AmdExtD3DShaderIntrinsics_IntersectBvhNode - // uint4 AmdExtD3DShaderIntrinsics_IntersectBvhNode( + // Ray tracing utility function: AmdExtD3DShaderIntrinsics_IntersectBvhNode + // uint4 AmdExtD3DShaderIntrinsics_IntersectBvhNode( #else - // Ray tracing utility function: AmdExtD3DShaderIntrinsics_IntersectInternal - // uint4 AmdExtD3DShaderIntrinsics_IntersectInternal( + // Ray tracing utility function: AmdExtD3DShaderIntrinsics_IntersectInternal + // uint4 AmdExtD3DShaderIntrinsics_IntersectInternal( #endif // in uint2 address, // in float ray_extent, @@ -840,6 +842,15 @@ void ProcessGpuRtLibrary::createGetKnownUnsetRayFlags(llvm::Function *func) { m_builder->CreateRet(m_builder->create()); } +// ===================================================================================================================== +// Fill in function to make a trace ray PC +// +// @param func : The function to create +void ProcessGpuRtLibrary::createMakePc(llvm::Function *func) { + Value *addr32 = m_builder->CreateLoad(m_builder->getInt32Ty(), func->getArg(0)); + m_builder->CreateRet(m_builder->create(func->getReturnType(), addr32)); +} + // ===================================================================================================================== // Fill in function of AmdExtDispatchThreadIdFlat // diff --git a/llpc/lowering/ProcessGpuRtLibrary.h b/llpc/lowering/ProcessGpuRtLibrary.h index 117e7d8f18..d6c6765fa7 100644 --- a/llpc/lowering/ProcessGpuRtLibrary.h +++ b/llpc/lowering/ProcessGpuRtLibrary.h @@ -133,6 +133,7 @@ class ProcessGpuRtLibrary : public SpirvLower, public llvm::PassInfoMixin %{{.*}}, i32 0 -; SHADERTEST: %{{.*}} = extractelement <2 x float> %{{.*}}, i32 1 +; SHADERTEST: %{{.*}} = extractelement <2 x float> %{{.*}}, i64 0 +; SHADERTEST: %{{.*}} = extractelement <2 x float> %{{.*}}, i64 1 ; SHADERTEST-LABEL: {{^// LLPC}} FE lowering results ; SHADERTEST: [[VEC1:%.*]] = shufflevector <3 x float> %{{.*}}, <3 x float> {{undef|poison}}, <4 x i32> ; SHADERTEST: [[VEC2:%.*]] = shufflevector <4 x float> , <4 x float> [[VEC1]], <4 x i32> diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec2xDmat4X2.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec2xDmat4X2.frag index b15554694e..58533e07c9 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec2xDmat4X2.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec2xDmat4X2.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec2xDmat4X2_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec2xDmat4X2_lit.frag index 96ec6a174b..8f2f9ee3d5 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec2xDmat4X2_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec2xDmat4X2_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat2X3.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat2X3.frag index e34ff888b6..2dde5ae7ce 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat2X3.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat2X3.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat2X3_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat2X3_lit.frag index 74785d47bb..9d1eab1fb3 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat2X3_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat2X3_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat4X3.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat4X3.frag index 8b769a2f05..950ce2ffe1 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat4X3.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat4X3.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat4X3_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat4X3_lit.frag index 88a68ea1d1..09cc7a7b26 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat4X3_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestDvec3xDmat4X3_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec2xMat3X2.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec2xMat3X2.frag index 80b0f68bdd..eb3fb2d249 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec2xMat3X2.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec2xMat3X2.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec2xMat3X2_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec2xMat3X2_lit.frag index a4e88c6443..49c593ba32 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec2xMat3X2_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec2xMat3X2_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec4xMat4.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec4xMat4.frag index 8808387f2a..9ffc0927bd 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec4xMat4.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec4xMat4.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 colorIn1; layout(location = 0) out vec4 color; diff --git a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec4xMat4_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec4xMat4_lit.frag index 5b3697cf3f..e8352b4bc6 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec4xMat4_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesMatrix_TestVec4xMat4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 colorIn1; layout(location = 0) out vec4 color; diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDoublexDvec4.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDoublexDvec4.frag index d6c5d54800..16e4126acd 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDoublexDvec4.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDoublexDvec4.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDoublexDvec4_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDoublexDvec4_lit.frag index 4c71b65768..67d1da11e5 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDoublexDvec4_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDoublexDvec4_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDvec4xDouble.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDvec4xDouble.frag index 3853306a6f..a2840be73f 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDvec4xDouble.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDvec4xDouble.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDvec4xDouble_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDvec4xDouble_lit.frag index cd626e9ab3..cbbc6fea6e 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDvec4xDouble_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestDvec4xDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestIvec2xInt.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestIvec2xInt.frag index 1218841a9e..b8e06ffcf7 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestIvec2xInt.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestIvec2xInt.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 fragColor; diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestIvec2xInt_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestIvec2xInt_lit.frag index ce13cdc383..34e39bdab4 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestIvec2xInt_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestIvec2xInt_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 fragColor; diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestUvec4xUint.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestUvec4xUint.frag index b3b8c508c1..d69f109b29 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestUvec4xUint.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestUvec4xUint.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 fragColor; diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestUvec4xUint_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestUvec4xUint_lit.frag index aab6f7f2f8..090f5f50db 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestUvec4xUint_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestUvec4xUint_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 fragColor; diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestVec3xConstFloat.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestVec3xConstFloat.frag index bac5a87862..c701fbe695 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestVec3xConstFloat.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestVec3xConstFloat.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 fragColor; diff --git a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestVec3xConstFloat_lit.frag b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestVec3xConstFloat_lit.frag index ca585f5f54..56e74d8514 100644 --- a/llpc/test/shaderdb/core/OpVectorTimesScalar_TestVec3xConstFloat_lit.frag +++ b/llpc/test/shaderdb/core/OpVectorTimesScalar_TestVec3xConstFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 fragColor; diff --git a/llpc/test/shaderdb/core/OverrideThreadGroupSize16X16X1.spvasm b/llpc/test/shaderdb/core/OverrideThreadGroupSize16X16X1.spvasm index b841769578..bbfc62e3a8 100644 --- a/llpc/test/shaderdb/core/OverrideThreadGroupSize16X16X1.spvasm +++ b/llpc/test/shaderdb/core/OverrideThreadGroupSize16X16X1.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s \ ; RUN: --override-threadGroupSizeX=16 --override-threadGroupSizeY=16 --override-threadGroupSizeZ=1 \ diff --git a/llpc/test/shaderdb/core/OverrideThreadGroupSize8X8X1.spvasm b/llpc/test/shaderdb/core/OverrideThreadGroupSize8X8X1.spvasm index da345746bc..c88aeeb4fd 100644 --- a/llpc/test/shaderdb/core/OverrideThreadGroupSize8X8X1.spvasm +++ b/llpc/test/shaderdb/core/OverrideThreadGroupSize8X8X1.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s \ ; RUN: --override-threadGroupSizeX=8 --override-threadGroupSizeY=8 --override-threadGroupSizeZ=1 \ diff --git a/llpc/test/shaderdb/core/ShaderRetInLoop.spvasm b/llpc/test/shaderdb/core/ShaderRetInLoop.spvasm index f5d6fdeeb2..3f1fb6016f 100644 --- a/llpc/test/shaderdb/core/ShaderRetInLoop.spvasm +++ b/llpc/test/shaderdb/core/ShaderRetInLoop.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/core/TestEnableImplicitInvariantExports.vert b/llpc/test/shaderdb/core/TestEnableImplicitInvariantExports.vert index b150529084..7a54086e08 100644 --- a/llpc/test/shaderdb/core/TestEnableImplicitInvariantExports.vert +++ b/llpc/test/shaderdb/core/TestEnableImplicitInvariantExports.vert @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // If implicit invariant marking is allowed for instructions contributing to gl_Position exports, the // fast math flag is disabled for these instructions. This occurs if invariance is expected even if no // invariance flag is being used in SPIR-V. Enabling the FMF can sometimes break rendering with FMA diff --git a/llpc/test/shaderdb/core/TestForceNonUniformResourceIndex.frag b/llpc/test/shaderdb/core/TestForceNonUniformResourceIndex.frag index 093ef500b6..b4a81ae27c 100644 --- a/llpc/test/shaderdb/core/TestForceNonUniformResourceIndex.frag +++ b/llpc/test/shaderdb/core/TestForceNonUniformResourceIndex.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Test not forcing NURI // BEGIN_SHADERTEST // RUN: amdllpc -v %gfxip %s --force-non-uniform-resource-index-stage-mask=0x00000000 | FileCheck -check-prefix=NOTFORCENURITEST %s diff --git a/llpc/test/shaderdb/core/TestNoContractBackwardPropagation.spvasm b/llpc/test/shaderdb/core/TestNoContractBackwardPropagation.spvasm index 09b50df9d1..75bde2a175 100644 --- a/llpc/test/shaderdb/core/TestNoContractBackwardPropagation.spvasm +++ b/llpc/test/shaderdb/core/TestNoContractBackwardPropagation.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -backward-propagate-no-contract -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/core/TestNoContractForwardPropagation.spvasm b/llpc/test/shaderdb/core/TestNoContractForwardPropagation.spvasm index d0d76748ee..571f06c76b 100644 --- a/llpc/test/shaderdb/core/TestNoContractForwardPropagation.spvasm +++ b/llpc/test/shaderdb/core/TestNoContractForwardPropagation.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -forward-propagate-no-contract -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/core/TestReverseThreadGroup.comp b/llpc/test/shaderdb/core/TestReverseThreadGroup.comp index f0a5b05fc9..8a0fba5e14 100644 --- a/llpc/test/shaderdb/core/TestReverseThreadGroup.comp +++ b/llpc/test/shaderdb/core/TestReverseThreadGroup.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 layout(local_size_x = 16, local_size_y = 16) in; diff --git a/llpc/test/shaderdb/core/TestThreadGroupSwizzle.comp b/llpc/test/shaderdb/core/TestThreadGroupSwizzle.comp index ed55d67b43..485280979d 100644 --- a/llpc/test/shaderdb/core/TestThreadGroupSwizzle.comp +++ b/llpc/test/shaderdb/core/TestThreadGroupSwizzle.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 layout(local_size_x = 16, local_size_y = 16) in; diff --git a/llpc/test/shaderdb/core/TestXfbStateMetadata.vert b/llpc/test/shaderdb/core/TestXfbStateMetadata.vert index e718a33126..6e135b4d8b 100644 --- a/llpc/test/shaderdb/core/TestXfbStateMetadata.vert +++ b/llpc/test/shaderdb/core/TestXfbStateMetadata.vert @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function-signature --check-globals // RUN: amdllpc -o - -gfxip 10.1 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s #version 450 diff --git a/llpc/test/shaderdb/debug_info/FunctionCall.pipe b/llpc/test/shaderdb/debug_info/FunctionCall.pipe index f551e320eb..98fc73a834 100644 --- a/llpc/test/shaderdb/debug_info/FunctionCall.pipe +++ b/llpc/test/shaderdb/debug_info/FunctionCall.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -trim-debug-info=false -filetype=asm -o - -gfxip 11.0 %s | FileCheck -check-prefixes=NOTRIM %s ; RUN: amdllpc -trim-debug-info=true -filetype=asm -o - -gfxip 11.0 %s | FileCheck -check-prefixes=TRIM %s diff --git a/llpc/test/shaderdb/debug_info/NonSemanticShaderDebug.pipe b/llpc/test/shaderdb/debug_info/NonSemanticShaderDebug.pipe index 5ad5647888..173a55e4ce 100644 --- a/llpc/test/shaderdb/debug_info/NonSemanticShaderDebug.pipe +++ b/llpc/test/shaderdb/debug_info/NonSemanticShaderDebug.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-globals ; RUN: amdllpc -trim-debug-info=false -v -gfxip 10.1 %s | FileCheck -check-prefixes=CHECK %s diff --git a/llpc/test/shaderdb/debug_info/PipelineGsTess_TestVsTesGsMergeShader.pipe b/llpc/test/shaderdb/debug_info/PipelineGsTess_TestVsTesGsMergeShader.pipe index 3230d03602..5e8918c027 100644 --- a/llpc/test/shaderdb/debug_info/PipelineGsTess_TestVsTesGsMergeShader.pipe +++ b/llpc/test/shaderdb/debug_info/PipelineGsTess_TestVsTesGsMergeShader.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-globals ; BEGIN_SHADERTEST ; RUN: amdllpc --print-after=lgc-prepare-pipeline-abi --enable-implicit-invariant-exports=1 -trim-debug-info=false 2>&1 %s | FileCheck -check-prefix=SHADERTEST %s @@ -793,6 +801,7 @@ attribute[2].offset = 0 ; SHADERTEST-NEXT: [[TMP46:%.*]] = extractelement <1 x float> [[TMP43]], i64 0, !dbg [[DBG116]] ; SHADERTEST-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP46]], i64 3, !dbg [[DBG116]] ; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.buffer.store.v4f32{{(.v4i32)?}}(<4 x float> [[TMP47]], <4 x i32> [[TMP10]], i32 [[TMP44]], i32 [[TFBUFFERBASE:%.*]], i32 1), !dbg [[DBG116]] +; SHADERTEST-NEXT: call void @llvm.amdgcn.s.setreg(i32 6401, i32 3), !dbg [[DBG116]] ; SHADERTEST-NEXT: [[TMP48:%.*]] = extractelement <3 x float> [[TMP38]], i64 0, !dbg [[DBG116]] ; SHADERTEST-NEXT: [[TMP49:%.*]] = extractelement <3 x float> [[TMP38]], i64 1, !dbg [[DBG116]] ; SHADERTEST-NEXT: [[TMP50:%.*]] = call float @llvm.minnum.f32(float [[TMP48]], float [[TMP49]]), !dbg [[DBG116]] @@ -811,10 +820,10 @@ attribute[2].offset = 0 ; SHADERTEST-NEXT: [[TMP61:%.*]] = add i32 [[TMP60]], 0, !dbg [[DBG116]] ; SHADERTEST-NEXT: [[TMP62:%.*]] = add i32 [[TMP57]], 0, !dbg [[DBG116]] ; SHADERTEST-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr addrspace(3) @Lds.HS, i32 [[TMP62]], !dbg [[DBG116]] -; SHADERTEST-NEXT: [[TMP64:%.*]] = load <4 x float>, ptr addrspace(3) [[TMP63]], align 4, !dbg [[DBG116]] +; SHADERTEST-NEXT: [[TMP64:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP63]], align 4, !dbg [[DBG116]] ; SHADERTEST-NEXT: [[TMP65:%.*]] = add i32 [[TMP61]], 0, !dbg [[DBG116]] ; SHADERTEST-NEXT: [[TMP66:%.*]] = mul i32 [[TMP65]], 4, !dbg [[DBG116]] -; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.tbuffer.store.v4f32{{(.v4i32)?}}(<4 x float> [[TMP64]], <4 x i32> [[TMP8]], i32 [[TMP66]], i32 [[OFFCHIPLDSBASE:%.*]], i32 77, i32 1), !dbg [[DBG116]] +; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.tbuffer.store.v4i32{{(.v4i32)?}}(<4 x i32> [[TMP64]], <4 x i32> [[TMP8]], i32 [[TMP66]], i32 [[OFFCHIPLDSBASE:%.*]], i32 77, i32 1), !dbg [[DBG116]] ; SHADERTEST-NEXT: br label [[DOTWRITEHSOUTPUTS_ENDIF]], !dbg [[DBG116]] ; SHADERTEST: .writeHsOutputs.endif: ; SHADERTEST-NEXT: ret void, !dbg [[DBG116]] @@ -846,10 +855,10 @@ attribute[2].offset = 0 ; SHADERTEST-NEXT: [[DOTI29:%.*]] = extractelement <4 x float> [[BC57]], i64 2, !dbg [[DBG122]] ; SHADERTEST-NEXT: [[BC58:%.*]] = bitcast <4 x i32> [[TMP12]] to <4 x float>, !dbg [[DBG122]] ; SHADERTEST-NEXT: [[DOTI310:%.*]] = extractelement <4 x float> [[BC58]], i64 3, !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE_I0:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TESSCOORDX]], [[DOTI07]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE_I1:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TESSCOORDX]], [[DOTI18]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE_I2:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TESSCOORDX]], [[DOTI29]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE_I3:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TESSCOORDX]], [[DOTI310]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE_I0:%.*]] = fmul nnan nsz afn float [[TESSCOORDX]], [[DOTI07]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE_I1:%.*]] = fmul nnan nsz afn float [[TESSCOORDX]], [[DOTI18]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE_I2:%.*]] = fmul nnan nsz afn float [[TESSCOORDX]], [[DOTI29]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE_I3:%.*]] = fmul nnan nsz afn float [[TESSCOORDX]], [[DOTI310]], !dbg [[DBG122]] ; SHADERTEST-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], 16, !dbg [[DBG122]] ; SHADERTEST-NEXT: [[TMP14:%.*]] = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> [[TMP8]], i32 [[TMP13]], i32 [[OFFCHIPLDSBASE]], i32 immarg 77, i32 immarg 5) #[[ATTR9]], !dbg [[DBG122]] ; SHADERTEST-NEXT: [[BC59:%.*]] = bitcast <4 x i32> [[TMP14]] to <4 x float>, !dbg [[DBG122]] @@ -860,14 +869,14 @@ attribute[2].offset = 0 ; SHADERTEST-NEXT: [[DOTI216:%.*]] = extractelement <4 x float> [[BC61]], i64 2, !dbg [[DBG122]] ; SHADERTEST-NEXT: [[BC62:%.*]] = bitcast <4 x i32> [[TMP14]] to <4 x float>, !dbg [[DBG122]] ; SHADERTEST-NEXT: [[DOTI318:%.*]] = extractelement <4 x float> [[BC62]], i64 3, !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE2_I0:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TESSCOORDY]], [[DOTI012]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE2_I1:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TESSCOORDY]], [[DOTI114]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE2_I2:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TESSCOORDY]], [[DOTI216]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE2_I3:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TESSCOORDY]], [[DOTI318]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[DOTI019:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[SCALE2_I0]], [[SCALE_I0]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[DOTI120:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[SCALE2_I1]], [[SCALE_I1]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[DOTI221:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[SCALE2_I2]], [[SCALE_I2]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[DOTI322:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[SCALE2_I3]], [[SCALE_I3]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE2_I0:%.*]] = fmul nnan nsz afn float [[TESSCOORDY]], [[DOTI012]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE2_I1:%.*]] = fmul nnan nsz afn float [[TESSCOORDY]], [[DOTI114]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE2_I2:%.*]] = fmul nnan nsz afn float [[TESSCOORDY]], [[DOTI216]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE2_I3:%.*]] = fmul nnan nsz afn float [[TESSCOORDY]], [[DOTI318]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[DOTI019:%.*]] = fadd nnan nsz afn float [[SCALE_I0]], [[SCALE2_I0]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[DOTI120:%.*]] = fadd nnan nsz afn float [[SCALE_I1]], [[SCALE2_I1]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[DOTI221:%.*]] = fadd nnan nsz afn float [[SCALE_I2]], [[SCALE2_I2]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[DOTI322:%.*]] = fadd nnan nsz afn float [[SCALE_I3]], [[SCALE2_I3]], !dbg [[DBG122]] ; SHADERTEST-NEXT: [[TMP15:%.*]] = add i32 [[TMP11]], 32, !dbg [[DBG122]] ; SHADERTEST-NEXT: [[TMP16:%.*]] = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> [[TMP8]], i32 [[TMP15]], i32 [[OFFCHIPLDSBASE]], i32 immarg 77, i32 immarg 5) #[[ATTR9]], !dbg [[DBG122]] ; SHADERTEST-NEXT: [[BC63:%.*]] = bitcast <4 x i32> [[TMP16]] to <4 x float>, !dbg [[DBG122]] @@ -878,14 +887,14 @@ attribute[2].offset = 0 ; SHADERTEST-NEXT: [[DOTI228:%.*]] = extractelement <4 x float> [[BC65]], i64 2, !dbg [[DBG122]] ; SHADERTEST-NEXT: [[BC66:%.*]] = bitcast <4 x i32> [[TMP16]] to <4 x float>, !dbg [[DBG122]] ; SHADERTEST-NEXT: [[DOTI330:%.*]] = extractelement <4 x float> [[BC66]], i64 3, !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE4_I0:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP1]], [[DOTI024]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE4_I1:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP1]], [[DOTI126]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE4_I2:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP1]], [[DOTI228]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[SCALE4_I3:%.*]] = fmul reassoc nnan nsz arcp contract afn float [[TMP1]], [[DOTI330]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[DOTI031:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[DOTI019]], [[SCALE4_I0]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[DOTI132:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[DOTI120]], [[SCALE4_I1]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[DOTI233:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[DOTI221]], [[SCALE4_I2]], !dbg [[DBG122]] -; SHADERTEST-NEXT: [[DOTI334:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[DOTI322]], [[SCALE4_I3]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE4_I0:%.*]] = fmul nnan nsz afn float [[TMP1]], [[DOTI024]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE4_I1:%.*]] = fmul nnan nsz afn float [[TMP1]], [[DOTI126]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE4_I2:%.*]] = fmul nnan nsz afn float [[TMP1]], [[DOTI228]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[SCALE4_I3:%.*]] = fmul nnan nsz afn float [[TMP1]], [[DOTI330]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[DOTI031:%.*]] = fadd nnan nsz afn float [[DOTI019]], [[SCALE4_I0]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[DOTI132:%.*]] = fadd nnan nsz afn float [[DOTI120]], [[SCALE4_I1]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[DOTI233:%.*]] = fadd nnan nsz afn float [[DOTI221]], [[SCALE4_I2]], !dbg [[DBG122]] +; SHADERTEST-NEXT: [[DOTI334:%.*]] = fadd nnan nsz afn float [[DOTI322]], [[SCALE4_I3]], !dbg [[DBG122]] ; SHADERTEST-NEXT: [[DOTIDX:%.*]] = mul i32 [[TMP10]], 20, !dbg [[DBG122]] ; SHADERTEST-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(3) @Lds.GS, i32 [[DOTIDX]], !dbg [[DBG122]] ; SHADERTEST-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP17]], i32 [[ESGSOFFSET:%.*]], !dbg [[DBG122]] diff --git a/llpc/test/shaderdb/debug_info/PipelineGs_TestVsGSMergeShader.pipe b/llpc/test/shaderdb/debug_info/PipelineGs_TestVsGSMergeShader.pipe index 3146e8b871..0ed8281ba8 100644 --- a/llpc/test/shaderdb/debug_info/PipelineGs_TestVsGSMergeShader.pipe +++ b/llpc/test/shaderdb/debug_info/PipelineGs_TestVsGSMergeShader.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-globals ; RUN: amdllpc --print-after=lgc-prepare-pipeline-abi -trim-debug-info=false 2>&1 %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugCompilationUnit.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugCompilationUnit.spvasm index 3a0e09c111..928f5ec6bf 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugCompilationUnit.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugCompilationUnit.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugDeclare.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugDeclare.spvasm index 5ddceeee37..6b38fbcaaf 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugDeclare.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugDeclare.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugExpression.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugExpression.spvasm index 7a3b6e9da9..bcefb51339 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugExpression.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugExpression.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugFunctionDeclaration.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugFunctionDeclaration.spvasm index d131cceaeb..b19735f384 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugFunctionDeclaration.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugFunctionDeclaration.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugLexicalBlock.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugLexicalBlock.spvasm index 09485101ff..63cbdcff41 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugLexicalBlock.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugLexicalBlock.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugSourceNoText.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugSourceNoText.spvasm index 33d755779e..17749111bc 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugSourceNoText.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugSourceNoText.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeArray.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeArray.spvasm index 23f5691c97..2fabdf6c2b 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeArray.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeArray.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeEnum.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeEnum.spvasm index d0d6635eb3..89e73bd791 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeEnum.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeEnum.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeFunction.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeFunction.spvasm index ab2ca5182e..516744c97c 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeFunction.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeFunction.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeInheritance.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeInheritance.spvasm index f784ccbfc2..b2ad9ef440 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeInheritance.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeInheritance.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypePointer.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypePointer.spvasm index ec793e6dac..1f0fe2c693 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypePointer.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypePointer.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeQualifier.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeQualifier.spvasm index 0c87341b55..2474ce9f92 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeQualifier.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeQualifier.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeVector.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeVector.spvasm index d4b2589e78..1775d050d0 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeVector.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypeVector.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypedef.spvasm b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypedef.spvasm index b481c17e16..71ff4384e5 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypedef.spvasm +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_DebugTypedef.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -trim-debug-info=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_TestFsBasic.frag b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_TestFsBasic.frag index d2c8fa1284..7c42e46e77 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_TestFsBasic.frag +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_TestFsBasic.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in flat int i0; layout(location = 1) in float i1; diff --git a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_TestVsBasic.vert b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_TestVsBasic.vert index 0458228259..63f7146703 100644 --- a/llpc/test/shaderdb/debug_info/avoid/DebugInfo_TestVsBasic.vert +++ b/llpc/test/shaderdb/debug_info/avoid/DebugInfo_TestVsBasic.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 f4; layout(location = 1) in int i1; diff --git a/llpc/test/shaderdb/error_reporting/GlslBadEntryPointName.frag b/llpc/test/shaderdb/error_reporting/GlslBadEntryPointName.frag index 6411b34d5d..aff1e28bf6 100644 --- a/llpc/test/shaderdb/error_reporting/GlslBadEntryPointName.frag +++ b/llpc/test/shaderdb/error_reporting/GlslBadEntryPointName.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + /* ; BEGIN_SHADERTEST ; RUN: not amdllpc %gfxip %s,mainFs \ diff --git a/llpc/test/shaderdb/error_reporting/GlslDuplicateStage.frag b/llpc/test/shaderdb/error_reporting/GlslDuplicateStage.frag index 2f1be52fc2..d12b8ad6bc 100644 --- a/llpc/test/shaderdb/error_reporting/GlslDuplicateStage.frag +++ b/llpc/test/shaderdb/error_reporting/GlslDuplicateStage.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Check that an error is produced when the same shader stage is provided twice. /* ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/error_reporting/InvalidGfxip.frag b/llpc/test/shaderdb/error_reporting/InvalidGfxip.frag index 17d364d829..44544402c0 100644 --- a/llpc/test/shaderdb/error_reporting/InvalidGfxip.frag +++ b/llpc/test/shaderdb/error_reporting/InvalidGfxip.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Check that an invalid gfxip generates the appropriate error /* ; RUN: not amdllpc -v -gfxip=1.2.3 %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/error_reporting/LlvmMissingShaderStage.ll b/llpc/test/shaderdb/error_reporting/LlvmMissingShaderStage.ll index 1ef02e3f26..67f40091fb 100644 --- a/llpc/test/shaderdb/error_reporting/LlvmMissingShaderStage.ll +++ b/llpc/test/shaderdb/error_reporting/LlvmMissingShaderStage.ll @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Check that an error is produced when valid LLVM IR is passed but is not a shader. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/error_reporting/LlvmVerificationFailure.ll b/llpc/test/shaderdb/error_reporting/LlvmVerificationFailure.ll index 51303caeb9..4cbcf35532 100644 --- a/llpc/test/shaderdb/error_reporting/LlvmVerificationFailure.ll +++ b/llpc/test/shaderdb/error_reporting/LlvmVerificationFailure.ll @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Check that an error is produced when parsable but invalid LLVM IR is passed. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/error_reporting/MultipleThreadsVerboseOutput.spvasm b/llpc/test/shaderdb/error_reporting/MultipleThreadsVerboseOutput.spvasm index 35e0c23af2..bcfccd6708 100644 --- a/llpc/test/shaderdb/error_reporting/MultipleThreadsVerboseOutput.spvasm +++ b/llpc/test/shaderdb/error_reporting/MultipleThreadsVerboseOutput.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that an error is produced when we request to use multiple threads and verbose output. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/error_reporting/SpirvBadEntryPoint.spvasm b/llpc/test/shaderdb/error_reporting/SpirvBadEntryPoint.spvasm index b7e6c5bb28..8236e1ae96 100644 --- a/llpc/test/shaderdb/error_reporting/SpirvBadEntryPoint.spvasm +++ b/llpc/test/shaderdb/error_reporting/SpirvBadEntryPoint.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that an error is produced when the specified entry point is not a valid SPIR-V entry point in the input. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/error_reporting/SpirvDuplicateStage.spvasm b/llpc/test/shaderdb/error_reporting/SpirvDuplicateStage.spvasm index 25f3a26c49..1422570366 100644 --- a/llpc/test/shaderdb/error_reporting/SpirvDuplicateStage.spvasm +++ b/llpc/test/shaderdb/error_reporting/SpirvDuplicateStage.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that an error is produced when the same shader stage is provided twice. ; BEGIN_SHADERTEST_ST diff --git a/llpc/test/shaderdb/error_reporting/SpirvInvalidOpcode.spvasm b/llpc/test/shaderdb/error_reporting/SpirvInvalidOpcode.spvasm index 5f9ea0f1fd..a43a91eb6a 100644 --- a/llpc/test/shaderdb/error_reporting/SpirvInvalidOpcode.spvasm +++ b/llpc/test/shaderdb/error_reporting/SpirvInvalidOpcode.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that an error is produced when bad SPIR-V is passed and it is not possible to assemble it. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/error_reporting/SpirvMissingEntryPoint.spvasm b/llpc/test/shaderdb/error_reporting/SpirvMissingEntryPoint.spvasm index 9c84b5a9c2..904111b75e 100644 --- a/llpc/test/shaderdb/error_reporting/SpirvMissingEntryPoint.spvasm +++ b/llpc/test/shaderdb/error_reporting/SpirvMissingEntryPoint.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that an error is produced when the specified entry point is empty. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/error_reporting/SpirvValidationFailure.spvasm b/llpc/test/shaderdb/error_reporting/SpirvValidationFailure.spvasm index 6556be379c..bf14e7005b 100644 --- a/llpc/test/shaderdb/error_reporting/SpirvValidationFailure.spvasm +++ b/llpc/test/shaderdb/error_reporting/SpirvValidationFailure.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that an error is produced when invalid SPIR-V is passed and validation is enabled. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/error_reporting/SpirvWildcardAndEntryPoint.spvasm b/llpc/test/shaderdb/error_reporting/SpirvWildcardAndEntryPoint.spvasm index ff8c40f336..c8954febef 100644 --- a/llpc/test/shaderdb/error_reporting/SpirvWildcardAndEntryPoint.spvasm +++ b/llpc/test/shaderdb/error_reporting/SpirvWildcardAndEntryPoint.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that an error is produced when wildcards and entrypoint are specified ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/error_reporting/UnsupportedMCPUOption.ll b/llpc/test/shaderdb/error_reporting/UnsupportedMCPUOption.ll index 7f4e9c2b69..9af2b2f283 100644 --- a/llpc/test/shaderdb/error_reporting/UnsupportedMCPUOption.ll +++ b/llpc/test/shaderdb/error_reporting/UnsupportedMCPUOption.ll @@ -1,3 +1,29 @@ + +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Check that an error is produced when the --mcpu flag is used with amdllpc. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFpRoundMode.spvasm b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFpRoundMode.spvasm index 71a21a1072..c77721752c 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFpRoundMode.spvasm +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFpRoundMode.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFsInput_lit.frag b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFsInput_lit.frag index 626a842f63..9ff5d4466e 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFsInput_lit.frag +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestFsInput_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_int16: enable diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsInput_lit.geom b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsInput_lit.geom index 679c3de0c1..1f9c30ade2 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsInput_lit.geom +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsInput_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_int16: enable diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsOutput_lit.geom b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsOutput_lit.geom index 32d916b252..cf1c2083ff 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsOutput_lit.geom +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestGsOutput_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_int16: enable diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsInput_lit.tesc b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsInput_lit.tesc index 3522e44693..627e824605 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsInput_lit.tesc +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsInput_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_int16: enable diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsOutput_lit.tesc b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsOutput_lit.tesc index 6c961adab6..f165e1d10f 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsOutput_lit.tesc +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTcsOutput_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_int16: enable diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesInput_lit.tese b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesInput_lit.tese index 5d4f3409e9..d6ed9c0865 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesInput_lit.tese +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesInput_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_int16: enable diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesOutput_lit.tese b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesOutput_lit.tese index 5fad524b4e..095b9a34bf 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesOutput_lit.tese +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestTesOutput_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_int16: enable diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsInput_lit.vert b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsInput_lit.vert index 225820996d..643fe38301 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsInput_lit.vert +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsInput_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_int16: enable diff --git a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsOutput_lit.vert b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsOutput_lit.vert index 11b10ba14f..70560cdd3f 100644 --- a/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsOutput_lit.vert +++ b/llpc/test/shaderdb/extensions/Ext16bitStorage_TestVsOutput_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_int16: enable diff --git a/llpc/test/shaderdb/extensions/ExtBufferReference_TestPointerCasting.frag b/llpc/test/shaderdb/extensions/ExtBufferReference_TestPointerCasting.frag index b7003fc6fa..a32752bb44 100644 --- a/llpc/test/shaderdb/extensions/ExtBufferReference_TestPointerCasting.frag +++ b/llpc/test/shaderdb/extensions/ExtBufferReference_TestPointerCasting.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable #extension GL_EXT_buffer_reference : enable #extension GL_EXT_buffer_reference_uvec2 : enable diff --git a/llpc/test/shaderdb/extensions/ExtDemoteToHelper_TestDemote.frag b/llpc/test/shaderdb/extensions/ExtDemoteToHelper_TestDemote.frag index f2eafbef1f..815c3d6820 100644 --- a/llpc/test/shaderdb/extensions/ExtDemoteToHelper_TestDemote.frag +++ b/llpc/test/shaderdb/extensions/ExtDemoteToHelper_TestDemote.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/ExtDemoteToHelper_TestIsHelperInvocation.frag b/llpc/test/shaderdb/extensions/ExtDemoteToHelper_TestIsHelperInvocation.frag index 4eb2042f13..54deafe6f3 100644 --- a/llpc/test/shaderdb/extensions/ExtDemoteToHelper_TestIsHelperInvocation.frag +++ b/llpc/test/shaderdb/extensions/ExtDemoteToHelper_TestIsHelperInvocation.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/ExtDeviceGroup_TestComputeShader_lit.comp b/llpc/test/shaderdb/extensions/ExtDeviceGroup_TestComputeShader_lit.comp index c167b9bb76..099e425e01 100644 --- a/llpc/test/shaderdb/extensions/ExtDeviceGroup_TestComputeShader_lit.comp +++ b/llpc/test/shaderdb/extensions/ExtDeviceGroup_TestComputeShader_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_device_group : enable diff --git a/llpc/test/shaderdb/extensions/ExtDeviceGroup_TestGraphicsShader_lit.vert b/llpc/test/shaderdb/extensions/ExtDeviceGroup_TestGraphicsShader_lit.vert index cd95941965..9a22de3e6d 100644 --- a/llpc/test/shaderdb/extensions/ExtDeviceGroup_TestGraphicsShader_lit.vert +++ b/llpc/test/shaderdb/extensions/ExtDeviceGroup_TestGraphicsShader_lit.vert @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_EXT_device_group : enable void main() diff --git a/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestBuiltIn_lit.frag b/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestBuiltIn_lit.frag index 43ac3902ba..9e78ac2e6d 100644 --- a/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestBuiltIn_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestBuiltIn_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_shader_explicit_vertex_parameter: enable diff --git a/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestInterpFunc_lit.frag b/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestInterpFunc_lit.frag index 9f55fa4d63..5e65ef5aef 100644 --- a/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestInterpFunc_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtExplicitVertexParam_TestInterpFunc_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_shader_explicit_vertex_parameter: enable diff --git a/llpc/test/shaderdb/extensions/ExtFragMask_TestFragFetch_lit.frag b/llpc/test/shaderdb/extensions/ExtFragMask_TestFragFetch_lit.frag index 00373da6d2..a8e9c4bc13 100644 --- a/llpc/test/shaderdb/extensions/ExtFragMask_TestFragFetch_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtFragMask_TestFragFetch_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_shader_fragment_mask: enable diff --git a/llpc/test/shaderdb/extensions/ExtGcnShader_TestBuiltInFunc_lit.frag b/llpc/test/shaderdb/extensions/ExtGcnShader_TestBuiltInFunc_lit.frag index 916688aa07..a18d014359 100644 --- a/llpc/test/shaderdb/extensions/ExtGcnShader_TestBuiltInFunc_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtGcnShader_TestBuiltInFunc_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gcn_shader: enable diff --git a/llpc/test/shaderdb/extensions/ExtGoogleHlslFunc_TestGeneral.spvasm b/llpc/test/shaderdb/extensions/ExtGoogleHlslFunc_TestGeneral.spvasm index cbe412dc75..9ea1c7e7fb 100644 --- a/llpc/test/shaderdb/extensions/ExtGoogleHlslFunc_TestGeneral.spvasm +++ b/llpc/test/shaderdb/extensions/ExtGoogleHlslFunc_TestGeneral.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/ExtMultiView_TestSubpassLoad_lit.pipe b/llpc/test/shaderdb/extensions/ExtMultiView_TestSubpassLoad_lit.pipe index 0f31a3d6cd..ec680ad9aa 100644 --- a/llpc/test/shaderdb/extensions/ExtMultiView_TestSubpassLoad_lit.pipe +++ b/llpc/test/shaderdb/extensions/ExtMultiView_TestSubpassLoad_lit.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestArithmeticAMD_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestArithmeticAMD_lit.frag index 6a7ddaed58..34544dcfa8 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestArithmeticAMD_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestArithmeticAMD_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_gpu_shader_int64: enable #extension GL_AMD_shader_ballot: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestArithmeticData16AMD.frag b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestArithmeticData16AMD.frag index 5640c4c6af..b1ece1a1b8 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestArithmeticData16AMD.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestArithmeticData16AMD.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_shader_ballot: enable #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestGeneral_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestGeneral_lit.frag index 6a97175545..8f89bf0404 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestGeneral_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestGeneral_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_shader_ballot: enable #extension GL_ARB_gpu_shader_int64: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestMiscAMD_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestMiscAMD_lit.frag index a568289145..4ccaa85087 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestMiscAMD_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestMiscAMD_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_gpu_shader_int64: enable #extension GL_ARB_shader_ballot: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestSwizzleAMD_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestSwizzleAMD_lit.frag index 55635cf5a3..5b1944c9e0 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderBallot_TestSwizzleAMD_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderBallot_TestSwizzleAMD_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_shader_ballot: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestFetchData16.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestFetchData16.frag index 448c9f1f67..925803a440 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestFetchData16.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestFetchData16.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_half_float_fetch: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestGatherData16.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestGatherData16.frag index 4f51a84643..d429e53e60 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestGatherData16.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestGatherData16.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_half_float_fetch: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestImagingData16.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestImagingData16.frag index 3b6f965fc3..da52e76184 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestImagingData16.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestImagingData16.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_half_float_fetch: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestSamplingData16.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestSamplingData16.frag index fc8de8cb54..1d989e8279 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestSamplingData16.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestSamplingData16.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_half_float_fetch: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestSubpassSamplingData16.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestSubpassSamplingData16.frag index 5a73dca4a9..5e81292ec4 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestSubpassSamplingData16.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16Fetch_TestSubpassSamplingData16.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_gpu_shader_half_float_fetch: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestAngleTrigFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestAngleTrigFuncs_lit.frag index 64016478cb..7edf564670 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestAngleTrigFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestAngleTrigFuncs_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestArithmeticOp_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestArithmeticOp_lit.frag index cfff330b65..3c05f4cad8 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestArithmeticOp_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestArithmeticOp_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestCommonFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestCommonFuncs_lit.frag index f564b06e4d..1a393fbaa3 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestCommonFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestCommonFuncs_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestDerivFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestDerivFuncs_lit.frag index cb1359dc5c..0b8a32ceba 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestDerivFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestDerivFuncs_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestExponentialFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestExponentialFuncs_lit.frag index cf4b34f049..a3dfaa413f 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestExponentialFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestExponentialFuncs_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestGeometryFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestGeometryFuncs_lit.frag index aa9cf609b1..0a2aa3c23a 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestGeometryFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestGeometryFuncs_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestInterpFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestInterpFuncs_lit.frag index 9153d1dd62..8cfe29a0e4 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestInterpFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestInterpFuncs_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestMatrixFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestMatrixFuncs_lit.frag index 699f089d89..b8adb5cfb0 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestMatrixFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestMatrixFuncs_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestPackUnpackFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestPackUnpackFuncs_lit.frag index 12679609db..92acd218b6 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestPackUnpackFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestPackUnpackFuncs_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestRelationalFuncs_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestRelationalFuncs_lit.frag index f8861d163b..e9ab0bd31b 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestRelationalFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestRelationalFuncs_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -gfxip 10.3 -o - %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestSpecConst.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestSpecConst.frag index 9be839635a..d1fd244360 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestSpecConst.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestSpecConst.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestStorageBlock.comp b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestStorageBlock.comp index 227e244cab..a36c997281 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestStorageBlock.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestStorageBlock.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_half_float : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestStorageBlockRowMajorMatrix.comp b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestStorageBlockRowMajorMatrix.comp index 799becc7f9..acbb2a69e8 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestStorageBlockRowMajorMatrix.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestStorageBlockRowMajorMatrix.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_half_float : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestTrinaryMinMaxFuncs.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestTrinaryMinMaxFuncs.frag index 1c39150d54..6a85240fc3 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestTrinaryMinMaxFuncs.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestTrinaryMinMaxFuncs.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_shader_trinary_minmax: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestVectorMatrixOp.frag b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestVectorMatrixOp.frag index f69b0e144a..91b0ea1610 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestVectorMatrixOp.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderFloat16_TestVectorMatrixOp.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBasicArithInt16.frag b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBasicArithInt16.frag index 1a9788ea1e..3fd90b81dd 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBasicArithInt16.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBasicArithInt16.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_int16 : enable layout(location = 0) out int16_t oColor; diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBasicArithUint16.frag b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBasicArithUint16.frag index 258faa8825..348199da26 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBasicArithUint16.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBasicArithUint16.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_int16 : enable layout(location = 0) out uint16_t oColor; diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBitwiseOp.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBitwiseOp.comp index e73a84b738..234e2e62f3 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBitwiseOp.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBitwiseOp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncAbs.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncAbs.comp index fc219ed7e2..d977301bbb 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncAbs.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncAbs.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncBitConv.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncBitConv.comp index 1facc3d23b..5397f9cec6 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncBitConv.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncBitConv.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable #extension GL_AMD_gpu_shader_half_float : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncFrexp.frag b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncFrexp.frag index 7d5de59f9e..deb3594750 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncFrexp.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncFrexp.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float : enable #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncMinMaxClamp.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncMinMaxClamp.comp index bcabe56ed4..73dbbb298a 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncMinMaxClamp.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncMinMaxClamp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncMix.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncMix.comp index 8e2b0a0822..5a004c2b87 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncMix.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncMix.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncPack.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncPack.comp index 3167776c06..51bbac16ea 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncPack.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncPack.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable #extension GL_ARB_gpu_shader_int64 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncSign.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncSign.comp index a84e38e09a..0a389a9fb6 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncSign.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncSign.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncUnpack.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncUnpack.comp index c33bfd8a53..071bb8a8d9 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncUnpack.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncUnpack.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable #extension GL_ARB_gpu_shader_int64 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncVectorCmp.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncVectorCmp.comp index db1be07738..f2321c6577 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncVectorCmp.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestBuiltInFuncVectorCmp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestNegate.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestNegate.comp index fbe42c5c73..c06ba10a87 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestNegate.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestNegate.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestScalarCmp.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestScalarCmp.comp index 1018bf4a3d..ee886446cf 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestScalarCmp.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestScalarCmp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestShiftOp.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestShiftOp.comp index 4446b619fe..741c73def0 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestShiftOp.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestShiftOp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestSpecConst.frag b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestSpecConst.frag index 63cf551a2b..8ee67b34dd 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestSpecConst.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestSpecConst.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_gpu_shader_int64 : enable #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestStorageBlockAccess.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestStorageBlockAccess.comp index e6b4fae212..d53a36e054 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestStorageBlockAccess.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestStorageBlockAccess.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable #extension GL_AMD_gpu_shader_half_float : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTrinaryMinMaxFuncs.frag b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTrinaryMinMaxFuncs.frag index 10cef0d39a..0eb5c0be2b 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTrinaryMinMaxFuncs.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTrinaryMinMaxFuncs.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_int16: enable #extension GL_AMD_shader_trinary_minmax: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromBool.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromBool.comp index d74e6813a5..205e51485a 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromBool.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromBool.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromFloat.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromFloat.comp index 87bd3bf9e4..d48f05557d 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromFloat.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromFloat.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable #extension GL_AMD_gpu_shader_half_float : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromInt.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromInt.comp index 2b1de2c22b..08243bbdbf 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromInt.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvFromInt.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable #extension GL_ARB_gpu_shader_int64 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToBool.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToBool.comp index 7d1293117f..34bf885244 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToBool.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToBool.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToFloat.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToFloat.comp index f7dafb043b..ecaf48bd0f 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToFloat.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToFloat.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable #extension GL_AMD_gpu_shader_half_float : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToInt.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToInt.comp index 37e68bdd50..1ae078d732 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToInt.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestTypeConvToInt.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable #extension GL_AMD_gpu_shader_half_float : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestUniformBlockAccess.comp b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestUniformBlockAccess.comp index 97e9c0c235..7d4ec1f1d3 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt16_TestUniformBlockAccess.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt16_TestUniformBlockAccess.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestArithmeticOp_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestArithmeticOp_lit.frag index 469817fcf2..301d319af0 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestArithmeticOp_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestArithmeticOp_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_gpu_shader_int64 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestBitwiseOp_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestBitwiseOp_lit.frag index fbf54e9695..fd23eef351 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestBitwiseOp_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestBitwiseOp_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_gpu_shader_int64 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestBuiltInFunc_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestBuiltInFunc_lit.frag index 113d755e13..67c1914c35 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestBuiltInFunc_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestBuiltInFunc_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_gpu_shader_int64 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag index bb5734d317..a00102940f 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestRelationalOp_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -gfxip 10.3 -o - %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestShiftOp_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestShiftOp_lit.frag index 6b1ed0d2b6..19a131c19e 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestShiftOp_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestShiftOp_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_gpu_shader_int64 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestTypeCast_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestTypeCast_lit.frag index b7b1805ec0..ab153111ae 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt64_TestTypeCast_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt64_TestTypeCast_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_gpu_shader_int64 : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBasicArithInt8.frag b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBasicArithInt8.frag index 404cd334ee..37a42735c8 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBasicArithInt8.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBasicArithInt8.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable layout(location = 0) out int8_t oColor; diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBasicArithUint8.frag b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBasicArithUint8.frag index 660370e78c..0e23aae624 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBasicArithUint8.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBasicArithUint8.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable layout(location = 0) out uint8_t oColor; diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBitwiseOp.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBitwiseOp.comp index 6898a789e1..fed3cd3938 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBitwiseOp.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBitwiseOp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBufLoadStore.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBufLoadStore.comp index d2102fc76f..37accf14c8 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBufLoadStore.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBufLoadStore.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types_int8: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncAbs.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncAbs.comp index edeb7e87bd..9e1e40ff05 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncAbs.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncAbs.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncMinMaxClamp.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncMinMaxClamp.comp index 92885d8334..35cd2f7ba4 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncMinMaxClamp.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncMinMaxClamp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncSign.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncSign.comp index 42ca06e6a3..704ffb7151 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncSign.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncSign.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncVectorCmp.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncVectorCmp.comp index 1fd9688698..5dfc4d0ff0 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncVectorCmp.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestBuiltInFuncVectorCmp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestFsInOut.frag b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestFsInOut.frag index 8184c83ed1..ad438e47b4 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestFsInOut.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestFsInOut.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_EXT_shader_explicit_arithmetic_types_int8: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestGsInOut.geom b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestGsInOut.geom index c9eff816da..f61a0c423d 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestGsInOut.geom +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestGsInOut.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_EXT_shader_explicit_arithmetic_types_int8: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestNegate.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestNegate.comp index c284168394..610c6c3ce2 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestNegate.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestNegate.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestScalarCmp.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestScalarCmp.comp index 8897259b52..87a0836c50 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestScalarCmp.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestScalarCmp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestSharedVarLoadStore_lit.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestSharedVarLoadStore_lit.comp index 72735bc323..91aeed05fc 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestSharedVarLoadStore_lit.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestSharedVarLoadStore_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types_int8: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestShiftOp_lit.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestShiftOp_lit.comp index 10001377f2..8ee211806c 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestShiftOp_lit.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestShiftOp_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types : enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestSpecConst_lit.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestSpecConst_lit.comp index d863dc945c..3aae64935e 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestSpecConst_lit.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestSpecConst_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types_int64: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTcsInOut.tesc b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTcsInOut.tesc index 81fbbd69cc..b8a9b94ea6 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTcsInOut.tesc +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTcsInOut.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_EXT_shader_explicit_arithmetic_types_int8: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTesInOut.tese b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTesInOut.tese index 2d6805696a..1928e0cee1 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTesInOut.tese +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTesInOut.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_EXT_shader_explicit_arithmetic_types_int8: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTypeConvert_lit.comp b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTypeConvert_lit.comp index 99e0f51997..a3054cdd4e 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTypeConvert_lit.comp +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestTypeConvert_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_EXT_shader_explicit_arithmetic_types_int64: enable diff --git a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestVsInOut_lit.vert b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestVsInOut_lit.vert index b831ea6c4d..801e174df7 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderInt8_TestVsInOut_lit.vert +++ b/llpc/test/shaderdb/extensions/ExtShaderInt8_TestVsInOut_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_EXT_shader_explicit_arithmetic_types_int8: enable @@ -21,7 +28,7 @@ void main (void) ; SHADERTEST: call void @lgc.output.export.generic.i32.i32.i8(i32 0, i32 0, i8 %{{[0-9]*}}) ; SHADERTEST: call void @lgc.output.export.generic.i32.i32.v3i8(i32 1, i32 0, <3 x i8> %{{[0-9]*}}) ; SHADERTEST-LABEL: {{^// LLPC}} LGC lowering results -; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 {{.*}}32, i32 {{.*}}1, float %{{[0-9]*}}, float poison, float poison, float poison, i1 {{.*}}false, i1 {{.*}}false) +; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 {{.*}}32, i32 {{.*}}1, float %{{[0-9]*}}, float undef, float undef, float undef, i1 {{.*}}false, i1 {{.*}}false) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/ExtShaderVote_TestGeneral_lit.frag b/llpc/test/shaderdb/extensions/ExtShaderVote_TestGeneral_lit.frag index 1e84697ff3..79ecf886d1 100644 --- a/llpc/test/shaderdb/extensions/ExtShaderVote_TestGeneral_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtShaderVote_TestGeneral_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_shader_group_vote: enable diff --git a/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadBroadcast.frag b/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadBroadcast.frag index e7b4f405ed..04739a697f 100644 --- a/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadBroadcast.frag +++ b/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadBroadcast.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadSwapDiagonal.frag b/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadSwapDiagonal.frag index 753cd6ef20..f15c1b6331 100644 --- a/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadSwapDiagonal.frag +++ b/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadSwapDiagonal.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadSwapVertical.frag b/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadSwapVertical.frag index 0cabc32750..090b6e57cd 100644 --- a/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadSwapVertical.frag +++ b/llpc/test/shaderdb/extensions/ExtSubgroupQuad_TestSubgroupQuadSwapVertical.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/ExtTrinaryMinMax_TestGeneral_lit.frag b/llpc/test/shaderdb/extensions/ExtTrinaryMinMax_TestGeneral_lit.frag index 6114d7f0a1..b1e8be9c38 100644 --- a/llpc/test/shaderdb/extensions/ExtTrinaryMinMax_TestGeneral_lit.frag +++ b/llpc/test/shaderdb/extensions/ExtTrinaryMinMax_TestGeneral_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_shader_trinary_minmax: enable diff --git a/llpc/test/shaderdb/extensions/ExtXfb_TessGsDoubleOutput_lit.geom b/llpc/test/shaderdb/extensions/ExtXfb_TessGsDoubleOutput_lit.geom index e2690b4086..024bda835c 100644 --- a/llpc/test/shaderdb/extensions/ExtXfb_TessGsDoubleOutput_lit.geom +++ b/llpc/test/shaderdb/extensions/ExtXfb_TessGsDoubleOutput_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; layout(triangle_strip, max_vertices = 4) out; diff --git a/llpc/test/shaderdb/extensions/ExtXfb_TestGsFloatOutput_lit.geom b/llpc/test/shaderdb/extensions/ExtXfb_TestGsFloatOutput_lit.geom index 16d0c2fd3a..478ddc69f0 100644 --- a/llpc/test/shaderdb/extensions/ExtXfb_TestGsFloatOutput_lit.geom +++ b/llpc/test/shaderdb/extensions/ExtXfb_TestGsFloatOutput_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; layout(triangle_strip, max_vertices = 4) out; diff --git a/llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm b/llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm index 73523f21da..a1d506232b 100644 --- a/llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm +++ b/llpc/test/shaderdb/extensions/ExtXfb_TestNoXfbExecutionMode.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -o - -gfxip 10.3 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s ; SPIR-V diff --git a/llpc/test/shaderdb/extensions/ExtXfb_TestTesDoubleOutput_lit.tese b/llpc/test/shaderdb/extensions/ExtXfb_TestTesDoubleOutput_lit.tese index 9b6e9268ec..8c504fc9cd 100644 --- a/llpc/test/shaderdb/extensions/ExtXfb_TestTesDoubleOutput_lit.tese +++ b/llpc/test/shaderdb/extensions/ExtXfb_TestTesDoubleOutput_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/extensions/ExtXfb_TestTesFloatOutput_lit.tese b/llpc/test/shaderdb/extensions/ExtXfb_TestTesFloatOutput_lit.tese index 5415518acb..7b20d6bff6 100644 --- a/llpc/test/shaderdb/extensions/ExtXfb_TestTesFloatOutput_lit.tese +++ b/llpc/test/shaderdb/extensions/ExtXfb_TestTesFloatOutput_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/extensions/ExtXfb_TestVsDoubleOutput_lit.vert b/llpc/test/shaderdb/extensions/ExtXfb_TestVsDoubleOutput_lit.vert index a2f18f3e9d..95d22037c0 100644 --- a/llpc/test/shaderdb/extensions/ExtXfb_TestVsDoubleOutput_lit.vert +++ b/llpc/test/shaderdb/extensions/ExtXfb_TestVsDoubleOutput_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in dvec4 fIn; layout(location = 0, xfb_buffer = 1, xfb_offset = 24) out dvec3 fOut1; diff --git a/llpc/test/shaderdb/extensions/ExtXfb_TestVsFloatOutput_lit.vert b/llpc/test/shaderdb/extensions/ExtXfb_TestVsFloatOutput_lit.vert index 047c985ed4..ab68a4c72a 100644 --- a/llpc/test/shaderdb/extensions/ExtXfb_TestVsFloatOutput_lit.vert +++ b/llpc/test/shaderdb/extensions/ExtXfb_TestVsFloatOutput_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 fIn; layout(location = 0, xfb_buffer = 1, xfb_offset = 24) out vec3 fOut1; diff --git a/llpc/test/shaderdb/extensions/ObjFloat16_TestTrinaryMinMaxFuncs_lit.frag b/llpc/test/shaderdb/extensions/ObjFloat16_TestTrinaryMinMaxFuncs_lit.frag index 0dea52a0e3..ade8511b91 100644 --- a/llpc/test/shaderdb/extensions/ObjFloat16_TestTrinaryMinMaxFuncs_lit.frag +++ b/llpc/test/shaderdb/extensions/ObjFloat16_TestTrinaryMinMaxFuncs_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_gpu_shader_half_float: enable #extension GL_AMD_shader_trinary_minmax: enable diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsDouble.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsDouble.frag index 6d695a78fd..c85dd713e6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsDouble.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsDouble.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsFloat.frag index 280f430cf9..c20f5ab647 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsFloat.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsInt.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsInt.frag index 8b5e694103..affa73f369 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsInt.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsInt.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsIvec4.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsIvec4.frag index 796e61afa3..606b1a086d 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsIvec4.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsIvec4.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsVec4.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsVec4.frag index b179412b6f..2b2fccbfc9 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAbsVec4.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAbsVec4.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAcos.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAcos.frag index 5847345274..da26f92f6d 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAcos.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAcos.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAcosFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAcosFloat.frag index 745c2cb438..4580791b6a 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAcosFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAcosFloat.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAcosh.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAcosh.frag index d1ee1b834e..a75a4db8c9 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAcosh.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAcosh.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAcoshFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAcoshFloat.frag index 7cc0e8d497..ad61d481a5 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAcoshFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAcoshFloat.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAsin.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAsin.frag index cf4af2c94c..02e22aed9d 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAsin.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAsin.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAsinFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAsinFloat.frag index ef047f9f59..d8f7447235 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAsinFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAsinFloat.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAsinh.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAsinh.frag index 0fa6dee812..eb51f386d6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAsinh.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAsinh.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAsinhFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAsinhFloat.frag index e6c11c6fe4..9f2df6f954 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAsinhFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAsinhFloat.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAtan.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAtan.frag index e16be090f9..8f4356c2e4 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAtan.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAtan.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAtan2.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAtan2.frag index 82ed006e5c..2bed19c73e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAtan2.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAtan2.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAtan2Float.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAtan2Float.frag index 74b71d8c5b..db82936f29 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAtan2Float.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAtan2Float.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAtanFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAtanFloat.frag index f51a0ab167..cb025090db 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAtanFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAtanFloat.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAtanh.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAtanh.frag index 34653309bd..390e835b32 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAtanh.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAtanh.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestAtanhFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestAtanhFloat.frag index f6995df513..0b8cc5ba0c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestAtanhFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestAtanhFloat.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCeilDouble.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCeilDouble.frag index 76d2a0475a..08da513477 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCeilDouble.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCeilDouble.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCeilFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCeilFloat.frag index 03d858423b..d0a0375113 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCeilFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCeilFloat.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCeilVec4Const.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCeilVec4Const.frag index c3f5a72ac9..a779152821 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCeilVec4Const.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCeilVec4Const.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestClampBasic.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestClampBasic.frag index 89394b34cc..ea7134b4ca 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestClampBasic.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestClampBasic.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestClampDouble.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestClampDouble.frag index 4bb4c5b22a..bdf2352504 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestClampDouble.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestClampDouble.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestClampFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestClampFloat.frag index 991267d443..996598b228 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestClampFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestClampFloat.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestClampInt.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestClampInt.frag index 2ed4c2d4f5..e284648afc 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestClampInt.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestClampInt.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestClampUint.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestClampUint.frag index 27491a7f69..2ec3a76de4 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestClampUint.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestClampUint.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCos.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCos.frag index a82033ce03..6bd43a846d 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCos.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCos.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCosVec4Const.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCosVec4Const.frag index 21b3ff4d2e..4f7df3e8b0 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCosVec4Const.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCosVec4Const.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCosh.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCosh.frag index 5b94e50b1f..9a90569432 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCosh.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCosh.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCoshFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCoshFloat.frag index d9e44286aa..b7b0b81962 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCoshFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCoshFloat.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCrossDouble.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCrossDouble.frag index cdfc650987..628ec7b6db 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCrossDouble.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCrossDouble.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCrossFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCrossFloat.frag index f6cc5589ec..10f21b7206 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCrossFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCrossFloat.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestCrossVec4.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestCrossVec4.frag index 4c374bdea7..2b07801987 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestCrossVec4.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestCrossVec4.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDegrees.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDegrees.frag index 2787f39640..e94ed3e6ce 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDegrees.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDegrees.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDegreesVec4Const.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDegreesVec4Const.frag index bbaf2e6a7d..59636ddf3e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDegreesVec4Const.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDegreesVec4Const.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantDmat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantDmat.frag index cfb2f3e64a..ccf3b387ed 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantDmat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantDmat.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat.frag index 10024895fd..76faa0370c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat2.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat2.frag index 50e6a364e6..bec91342a6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat2.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat2.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in mat2 m0; layout(location = 0) out vec4 o_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat4.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat4.frag index e434989032..2db2b72ae8 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat4.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDeterminantMat4.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in mat4 m0; layout(location = 0) out vec4 o_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceBasic.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceBasic.frag index b15f3a7071..96cd7dddd4 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceBasic.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceBasic.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceDouble.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceDouble.frag index 2f0f8e64f5..ef045dfb3c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceDouble.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceDouble.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceFloat.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceFloat.frag index 3e449662d4..e26e8b7726 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceFloat.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceFloat.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceVec4.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceVec4.frag index 4df29d5eed..e18e9b4c96 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceVec4.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestDistanceVec4.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestExp.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestExp.frag index 19c6f74f4d..a5e74ea84e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestExp.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestExp.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestExp2.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestExp2.frag index bb908aa641..7d693c4035 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestExp2.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestExp2.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestExp2Vec4Const.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestExp2Vec4Const.frag index 795a5fccf6..f1f60109fd 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestExp2Vec4Const.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestExp2Vec4Const.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestExpVec4Const.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestExpVec4Const.frag index 8e2a7eff18..0d804ac319 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestExpVec4Const.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestExpVec4Const.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForward.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForward.frag index 8ab2200df0..e9cc4aa8c1 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForward.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForward.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForwardDouble.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForwardDouble.frag index d1f81de919..dd1752b7f6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForwardDouble.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForwardDouble.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForwardVec4.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForwardVec4.frag index 63b322d5ba..d26f90ad48 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForwardVec4.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFaceForwardVec4.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFindILsbInt.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFindILsbInt.frag index 98626601f4..f988df75e5 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFindILsbInt.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFindILsbInt.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFindILsbUint.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFindILsbUint.frag index 0c31c9cb64..c7b2709417 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFindILsbUint.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFindILsbUint.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFindLsbInt.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFindLsbInt.frag index 0276ef1bf2..6f342d5c84 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFindLsbInt.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFindLsbInt.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; layout(location = 0) out vec4 color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFindMsbInt.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFindMsbInt.frag index 5120f91a01..7979c1ce4a 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFindMsbInt.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFindMsbInt.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; layout(location = 0) out vec4 color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFindMsbUint.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFindMsbUint.frag index ffb4f28594..9a09c7722f 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFindMsbUint.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFindMsbUint.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; layout(location = 0) out vec4 color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFindSMsb.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFindSMsb.frag index e119f2d60f..95c338fb6d 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFindSMsb.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFindSMsb.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFindUMsb.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFindUMsb.frag index 0a9b74a49f..84582d89f8 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFindUMsb.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFindUMsb.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFloatBitsToInt_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFloatBitsToInt_lit.frag index f113e1e01a..51ea2d9872 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFloatBitsToInt_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFloatBitsToInt_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFloatBitsToUint_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFloatBitsToUint_lit.frag index cdb5a4bef9..fc383cc5d3 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFloatBitsToUint_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFloatBitsToUint_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFloorDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFloorDouble_lit.frag index 3143c015c2..fc7cf77583 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFloorDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFloorDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFloorFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFloorFloat_lit.frag index 074fa467a1..c966332113 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFloorFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFloorFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFloorVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFloorVec4Const_lit.frag index 252ac7e59a..801d69a9ca 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFloorVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFloorVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag index 97fc801be0..719eecfb60 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -o - %s | FileCheck -check-prefix=CHECK %s #version 450 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag index d47253d3fc..4a112561a8 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -o - %s | FileCheck -check-prefix=CHECK %s #version 450 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaVec4Const_lit.frag index 26bf15e98d..a62466b2a9 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaVec4Const_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -o - %s | FileCheck -check-prefix=CHECK %s #version 450 core diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFractDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFractDouble_lit.frag index ca88c09324..aa0ec80464 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFractDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFractDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFractFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFractFloat_lit.frag index 0579580da3..177092f633 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFractFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFractFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFractVec4Const-lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFractVec4Const-lit.frag index 828a76bf45..6e67b500e8 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFractVec4Const-lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFractVec4Const-lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpDouble_lit.frag index 3f556fecb6..af44edb7e1 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpFloat_lit.frag index 35be3c7b2e..8d97072553 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructDouble_lit.frag index 4bb58528a7..d7123bd516 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructFloat_lit.frag index 489e15d34b..0869085e58 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructVec4_lit.frag index 9dd08ba611..e172f271db 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFrexpStructVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestIntBitsToFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestIntBitsToFloat_lit.frag index 7e3ac1eb30..cf58056855 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestIntBitsToFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestIntBitsToFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroidNoPersp_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroidNoPersp_lit.frag index fa7d248ce3..3d532e73ad 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroidNoPersp_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroidNoPersp_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroid_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroid_lit.frag index fca5cfb18c..02e722a88c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroid_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroid_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in centroid float f1_1; layout(location = 1) in vec4 f4_1; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag index 502778d411..486bc44b77 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in centroid float f1_1; layout(location = 1) in flat sample vec4 f4_1; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag index 724ea9dac0..03ebaca1c0 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in centroid float f1_1; layout(location = 1) in flat sample vec4 f4_1; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArray.frag index ec2e380f7d..befa846112 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArray.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #define ITER 4 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArrayInStruct.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArrayInStruct.frag index 391b81a616..8ba38fd062 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArrayInStruct.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DArrayInStruct.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #define ITER 5 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DStructArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DStructArray.frag index d2ea081063..78155c7227 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DStructArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx1DStructArray.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #define ITER 3 struct Struct_2 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStruct.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStruct.frag index e9dce82017..17d5073a76 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStruct.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStruct.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct Struct_2 { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStructInArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStructInArray.frag index 97da0627ac..01ca5d78d1 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStructInArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DArrayInStructInArray.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct Struct_2 { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DStructArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DStructArray.frag index bb1d764915..7525a8c4cf 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DStructArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx2DStructArray.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct Struct_2 { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx3DArray.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx3DArray.frag index 88770c34c7..4168d46e11 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx3DArray.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdx3DArray.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 frag_color; layout(location = 0) in flat int x; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdxVector.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdxVector.frag index f635160b4f..c2e3e8787d 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdxVector.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateDynIdxVector.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 frag_color; layout(location = 0) centroid in vec2 interp; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInverseMat4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInverseMat4_lit.frag index d288670c4b..751fe432db 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInverseMat4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInverseMat4_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in mat4 m0; layout(location = 0) out vec4 o_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtDouble_lit.frag index 8312c9bac8..ca649bfe20 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtFloat_lit.frag index af100368e1..53af84cb5b 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtVec4Const_lit.frag index 1c1e56988f..33da0cea62 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInverseSqrtVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpDouble_lit.frag index 55f7d480dd..eb45cc6b71 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpFloat_lit.frag index 99fa46a12e..69765ddb07 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpVec4_lit.frag index 24e2ed586c..9382686c82 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLdexpVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLengthBasic_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLengthBasic_lit.frag index 390112d1cc..9a417f16b0 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLengthBasic_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLengthBasic_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; layout(location = 0) out vec4 color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLengthDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLengthDouble_lit.frag index 3d5957173c..818ae9e812 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLengthDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLengthDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLengthFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLengthFloat_lit.frag index 62c7e9ae60..b06d5a6c1a 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLengthFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLengthFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLengthVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLengthVec4_lit.frag index 820d538e92..5a096da070 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLengthVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLengthVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; layout(location = 10) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLog2Vec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLog2Vec4Const_lit.frag index 3b583b6e35..e01e632444 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLog2Vec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLog2Vec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLog2_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLog2_lit.frag index 6db3f55901..ec7fa44fdd 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLog2_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLog2_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLogVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLogVec4Const_lit.frag index 44dc16de65..eac1076bdd 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLogVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLogVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestLog_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestLog_lit.frag index 45fff82809..0695a96930 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestLog_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestLog_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMatrixInverseDmat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMatrixInverseDmat_lit.frag index 276eb3ed54..b832124680 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMatrixInverseDmat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMatrixInverseDmat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMatrixInverseMat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMatrixInverseMat_lit.frag index 2f30fd92a6..c1a59f5b2e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMatrixInverseMat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMatrixInverseMat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxBasic_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxBasic_lit.frag index bcce222d7b..ee4f9ebcf1 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxBasic_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxBasic_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxDouble_lit.frag index 1412d9177f..2f88a692e9 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxFloat_lit.frag index e1e7896d4a..cb064f6688 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxInt_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxInt_lit.frag index 5ebe39782c..8ddd487638 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxInt_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxInt_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxUint_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxUint_lit.frag index 0410e62bbe..97398851ba 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMaxUint_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMaxUint_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMinBasic_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMinBasic_lit.frag index cfae566d1c..f081536a18 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMinBasic_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMinBasic_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMinDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMinDouble_lit.frag index 6706906b7b..d41a5fc39a 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMinDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMinDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMinFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMinFloat_lit.frag index fbf368ff92..6ca04eb759 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMinFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMinFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMinInt_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMinInt_lit.frag index 9b87f06d55..2a11359fde 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMinInt_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMinInt_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMinUint_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMinUint_lit.frag index 27189a2b08..a49537b668 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMinUint_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMinUint_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixBasic_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixBasic_lit.frag index a6a34bdc0a..a904ca4909 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixBasic_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixBasic_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixLinearBlendDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixLinearBlendDouble_lit.frag index dd5080b858..54cfb08de6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixLinearBlendDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixLinearBlendDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixLinearBlendFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixLinearBlendFloat_lit.frag index bb0acc4489..fb0ce38783 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixLinearBlendFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixLinearBlendFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag index db02c3d2dd..e2d795a18b 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -o - %s | FileCheck -check-prefix=CHECK %s diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag index 02dfc24143..efcf0d4fa6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -o - %s | FileCheck -check-prefix=CHECK %s diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag index f229d3c386..ec6c82004e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -o - %s | FileCheck -check-prefix=CHECK %s diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag index f65d21e5f0..3484895f52 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -emit-lgc -o - %s | FileCheck -check-prefix=CHECK %s diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestModfDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestModfDouble_lit.frag index 70704643b5..a5f387b85b 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestModfDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestModfDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestModfFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestModfFloat_lit.frag index 9c50c76e09..e4389b4208 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestModfFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestModfFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestModfVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestModfVec4_lit.frag index dffb2468ff..e63d31f12c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestModfVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestModfVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestNonSemanticInfo.spvasm b/llpc/test/shaderdb/extensions/OpExtInst_TestNonSemanticInfo.spvasm index 08ed996e72..9e7354d193 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestNonSemanticInfo.spvasm +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestNonSemanticInfo.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeDouble_lit.frag index 5ad7835210..75437f11b1 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeFloat_lit.frag index 56ed505023..81fbaac855 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeVec4_lit.frag index 7622cd89bc..9b38fcd75e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestNormalizeVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPackDouble2x32_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPackDouble2x32_lit.frag index 14627afba5..2f9408f90f 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPackDouble2x32_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPackDouble2x32_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPackHalf2x16_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPackHalf2x16_lit.frag index 0d90c543cc..24da9a49cf 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPackHalf2x16_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPackHalf2x16_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPackSnorm2x16_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPackSnorm2x16_lit.frag index 327662a2da..7b0b509b18 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPackSnorm2x16_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPackSnorm2x16_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPackSnorm4x8_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPackSnorm4x8_lit.frag index 67c41495c8..51ff1a4a2f 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPackSnorm4x8_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPackSnorm4x8_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPackUnorm2x16_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPackUnorm2x16_lit.frag index bd5e86d0f6..780756a08e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPackUnorm2x16_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPackUnorm2x16_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPackUnorm4x8_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPackUnorm4x8_lit.frag index 7cae4837e8..9fcb45bee7 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPackUnorm4x8_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPackUnorm4x8_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPow2_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPow2_lit.frag index c6ce6a8b0a..88964f5d7a 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPow2_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPow2_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float fIn; layout(location = 0) out float fOut; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPowVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPowVec4Const_lit.frag index 0c3b1e577b..a94698e183 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPowVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPowVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPow_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPow_lit.frag index 336ff43465..a897b55842 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPow_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPow_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRadiansVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRadiansVec4Const_lit.frag index 601bc0c2bc..e2f1fa8b65 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRadiansVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRadiansVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRadians_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRadians_lit.frag index f7ede7f6da..13be28032c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRadians_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRadians_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestReflectDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestReflectDouble_lit.frag index 61cd90f042..8ef85e9ad0 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestReflectDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestReflectDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestReflectFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestReflectFloat_lit.frag index 693b4fdf00..308ff40b9f 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestReflectFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestReflectFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestReflectVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestReflectVec4_lit.frag index 7b7a5e0996..acba443696 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestReflectVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestReflectVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRefractDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRefractDouble_lit.frag index 585d59bfc6..c8d67feae3 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRefractDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRefractDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRefractFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRefractFloat_lit.frag index 286a55bdf5..995de57155 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRefractFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRefractFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRefractVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRefractVec4_lit.frag index d4eb9d8871..c94999debe 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRefractVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRefractVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundDouble_lit.frag index 9b0445b3d1..8b7684b02f 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenDouble_lit.frag index 0d7c35a23e..1c1dc03de5 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenFloat_lit.frag index 6155f6c361..44db2d10ef 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenVec4_lit.frag index 099aee1d93..bcdc57b269 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundEvenVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundFloat_lit.frag index 174cbbf48b..1457f17632 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundVec4_lit.frag index 9a0eee3fd0..73370c0fc7 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestRoundVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestRoundVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSignDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSignDouble_lit.frag index 960b94c1db..e999e44043 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSignDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSignDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSignFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSignFloat_lit.frag index e886c3abed..8238a699b6 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSignFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSignFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSignInt_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSignInt_lit.frag index 8e05c8b5fc..15d87eef0c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSignInt_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSignInt_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSignIvec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSignIvec4_lit.frag index 2609f36521..ad6a9d53f3 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSignIvec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSignIvec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a0; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSignVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSignVec4_lit.frag index b44ed1eef0..a0c7719ef1 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSignVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSignVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSinVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSinVec4Const_lit.frag index 648fbf176c..6288bd2261 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSinVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSinVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSin_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSin_lit.frag index 9cb059d33f..1f7c3a67f0 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSin_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSin_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSinhFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSinhFloat_lit.frag index 01eb72db96..467d490a3f 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSinhFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSinhFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSinh_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSinh_lit.frag index d03d2dce80..c6b2deb68c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSinh_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSinh_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepDouble_lit.frag index d4b2c9ebd9..1d0a3c0446 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepFloat_lit.frag index 3f18781cbc..ec1b60d98e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepVec4Const_lit.frag index d4866f588f..bbcdae09b1 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSmoothStepVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtDouble_lit.frag index a1ddd863db..bed11dae03 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtFloat_lit.frag index 2ea53fb454..e6ccdd9531 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtVec4Const_lit.frag index 4704629530..ea5365375c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestSqrtVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestStepDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestStepDouble_lit.frag index a26fb38f4f..f751cf2d1a 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestStepDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestStepDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestStepFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestStepFloat_lit.frag index 5c3a5d7615..4a3cddb268 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestStepFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestStepFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestStepVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestStepVec4Const_lit.frag index c6fe109e66..52b74c0594 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestStepVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestStepVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 1) in vec4 b; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestTanVec4Const_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestTanVec4Const_lit.frag index 64de8f3491..67e956d5e8 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestTanVec4Const_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestTanVec4Const_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestTan_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestTan_lit.frag index c9cf98d5ac..7bad534492 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestTan_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestTan_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestTanhFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestTanhFloat_lit.frag index 0752ca2e22..aacefbc2d7 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestTanhFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestTanhFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 a; layout(location = 0) out vec4 frag_color; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestTanh_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestTanh_lit.frag index 3ea57278da..62eca6420d 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestTanh_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestTanh_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestTruncDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestTruncDouble_lit.frag index 6df67deafc..f0a33d1002 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestTruncDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestTruncDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestTruncFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestTruncFloat_lit.frag index a3660dcc4f..f62b98ce29 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestTruncFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestTruncFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestTruncVec4_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestTruncVec4_lit.frag index d40a950174..59fc8b6ed3 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestTruncVec4_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestTruncVec4_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float a; layout(location = 1) in vec4 b0; diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestUintBitsToFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestUintBitsToFloat_lit.frag index 9164cbfbc2..64146144b4 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestUintBitsToFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestUintBitsToFloat_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackDouble2x32_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackDouble2x32_lit.frag index 0a58d551f1..436fa344da 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackDouble2x32_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackDouble2x32_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackHalf2x16_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackHalf2x16_lit.frag index 03a3156de0..d7aa3e323e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackHalf2x16_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackHalf2x16_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackSnorm2x16_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackSnorm2x16_lit.frag index 5e8a73ad96..a8cd1da67f 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackSnorm2x16_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackSnorm2x16_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackSnorm4x8_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackSnorm4x8_lit.frag index f95d7ba755..94853f30bf 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackSnorm4x8_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackSnorm4x8_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackUnorm2x16_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackUnorm2x16_lit.frag index 4fb9d34c4d..601914f2bf 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackUnorm2x16_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackUnorm2x16_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackUnorm4x8_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackUnorm4x8_lit.frag index cd382a542e..0ce8e11ea0 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackUnorm4x8_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestUnpackUnorm4x8_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestinverseMat2-lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestinverseMat2-lit.frag index 484f275108..286685cb05 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestinverseMat2-lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestinverseMat2-lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in mat2 m0; layout(location = 0) out vec4 o_color; diff --git a/llpc/test/shaderdb/extensions/PipelineVsFs_TestAlpha2Coverage.pipe b/llpc/test/shaderdb/extensions/PipelineVsFs_TestAlpha2Coverage.pipe index d8b488c281..32b4e61980 100644 --- a/llpc/test/shaderdb/extensions/PipelineVsFs_TestAlpha2Coverage.pipe +++ b/llpc/test/shaderdb/extensions/PipelineVsFs_TestAlpha2Coverage.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that the value of DB_SHADER_CONTROL is set correctly. ; RUN: amdllpc -enable-part-pipeline=0 -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/extensions/PipelineVsFs_ViewIndexWithMultiViewDisabled.pipe b/llpc/test/shaderdb/extensions/PipelineVsFs_ViewIndexWithMultiViewDisabled.pipe index 5366b21879..f0c5b7da72 100644 --- a/llpc/test/shaderdb/extensions/PipelineVsFs_ViewIndexWithMultiViewDisabled.pipe +++ b/llpc/test/shaderdb/extensions/PipelineVsFs_ViewIndexWithMultiViewDisabled.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that the view index folds to 0 when multiview is not enabled. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_ComputeBlockPressure.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_ComputeBlockPressure.spvasm index 611ac4e999..b6d6c74f3f 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_ComputeBlockPressure.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_ComputeBlockPressure.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Summary: assertion failure in llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp. ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_FindKillUseAfterPoison.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_FindKillUseAfterPoison.spvasm index 76c263bfc7..ae23b061fd 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_FindKillUseAfterPoison.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_FindKillUseAfterPoison.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Summary: AddressSanitizer finds use-after-poison llvm-project/llvm/include/llvm/CodeGen/MachineInstr.h:281:43 in getParent ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_ISelAlignment.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_ISelAlignment.spvasm index e29a719d33..826914307f 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_ISelAlignment.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_ISelAlignment.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestAccessChainIndexConvertedFromFloat.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestAccessChainIndexConvertedFromFloat.spvasm index c13fcfc56c..c6053b203c 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestAccessChainIndexConvertedFromFloat.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestAccessChainIndexConvertedFromFloat.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestAccessChainUsingInputPointer.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestAccessChainUsingInputPointer.spvasm index a55e81c2b9..5da6c60c7a 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestAccessChainUsingInputPointer.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestAccessChainUsingInputPointer.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestBVec4.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestBVec4.spvasm index dba645ab48..58bc49e861 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestBVec4.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestBVec4.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestConditionalsAndOpKill.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestConditionalsAndOpKill.spvasm index e80d70d94b..b9d4f632cc 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestConditionalsAndOpKill.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestConditionalsAndOpKill.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestControlFlowInFunction.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestControlFlowInFunction.spvasm index 4f7d8d3158..7eef21f472 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestControlFlowInFunction.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestControlFlowInFunction.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopDeepIfLoop.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopDeepIfLoop.spvasm index 32acd85096..756d402220 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopDeepIfLoop.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopDeepIfLoop.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopNestedIfs.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopNestedIfs.spvasm index 433e58c0db..2fdb78303b 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopNestedIfs.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopNestedIfs.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopsIfsContinuesCall.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopsIfsContinuesCall.spvasm index a31bf0a1b4..7fbec7d61a 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopsIfsContinuesCall.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestLoopsIfsContinuesCall.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestMaxMixConditionalDiscard.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestMaxMixConditionalDiscard.spvasm index c03d53a0d9..4102f6ae62 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestMaxMixConditionalDiscard.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestMaxMixConditionalDiscard.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestModFGlColor.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestModFGlColor.spvasm index fd332ab08a..bebf87daf4 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestModFGlColor.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestModFGlColor.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestModFTempColor.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestModFTempColor.spvasm index 2420f5d487..901512a4ea 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestModFTempColor.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestModFTempColor.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpCopyObject.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpCopyObject.spvasm index 820c02d7af..c26962a33f 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpCopyObject.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpCopyObject.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpCopyObjectFromAccessChain.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpCopyObjectFromAccessChain.spvasm index 72c571bb00..1f84e3ab2a 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpCopyObjectFromAccessChain.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpCopyObjectFromAccessChain.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpIAddCarry.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpIAddCarry.spvasm index 677ee77731..4969e20314 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpIAddCarry.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpIAddCarry.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpPhisAtLoopHeader.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpPhisAtLoopHeader.spvasm index abc7f73410..6587902004 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpPhisAtLoopHeader.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpPhisAtLoopHeader.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpSNegate.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpSNegate.spvasm index d8cd8d9df7..84ae403c56 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpSNegate.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestOpSNegate.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestSimilarNestedIfs.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestSimilarNestedIfs.spvasm index 8a93f14de1..7573db53a4 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestSimilarNestedIfs.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestSimilarNestedIfs.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestSwitch.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestSwitch.spvasm index 549d6bffa2..505b28579a 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestSwitch.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestSwitch.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsMatrix.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsMatrix.spvasm index 5048d4a234..ba58e66f64 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsMatrix.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsMatrix.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsSetStruct.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsSetStruct.spvasm index 2835b0d77c..55b732e544 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsSetStruct.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsSetStruct.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsWithBreak.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsWithBreak.spvasm index 44b3d58e82..3513903ab8 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsWithBreak.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_TestTwoLoopsWithBreak.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_UseNotJointlyDominatedByDefs.spvasm b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_UseNotJointlyDominatedByDefs.spvasm index 6ec68dd45f..5dd7ffb04d 100644 --- a/llpc/test/shaderdb/fuzzer/GraphicsFuzz_UseNotJointlyDominatedByDefs.spvasm +++ b/llpc/test/shaderdb/fuzzer/GraphicsFuzz_UseNotJointlyDominatedByDefs.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Summary: fails with "Use of $noreg does not have a corresponding definition on every path". ; BEGIN_SHADERTEST ; RUN: amdllpc --verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/AggressiveInvariantLoads.pipe b/llpc/test/shaderdb/general/AggressiveInvariantLoads.pipe index a46c998b67..a42870d591 100644 --- a/llpc/test/shaderdb/general/AggressiveInvariantLoads.pipe +++ b/llpc/test/shaderdb/general/AggressiveInvariantLoads.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} LGC lowering results diff --git a/llpc/test/shaderdb/general/CallInstAsUserOfGlobalVariable.spvasm b/llpc/test/shaderdb/general/CallInstAsUserOfGlobalVariable.spvasm index 29771890c8..aa8ab59658 100644 --- a/llpc/test/shaderdb/general/CallInstAsUserOfGlobalVariable.spvasm +++ b/llpc/test/shaderdb/general/CallInstAsUserOfGlobalVariable.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test checks if lowerGlobal is handling properly case with removed zero-index GEPs. ; @_ug_input23 = external addrspace(7) global [2 x <{ [0 x float] }>], !spirv.Resource !2, !spirv.Block !1 diff --git a/llpc/test/shaderdb/general/CantOptimizePointSizeWrite.pipe b/llpc/test/shaderdb/general/CantOptimizePointSizeWrite.pipe index bf2520339d..a76fdcb187 100644 --- a/llpc/test/shaderdb/general/CantOptimizePointSizeWrite.pipe +++ b/llpc/test/shaderdb/general/CantOptimizePointSizeWrite.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test is to verify the optimization of PointSize write cannot be performed. When the write value of PointSize is ; not uniformly 1.0, we should disable the optimization. In such case, PointSize normally takes different values. The ; missing writes of 1.0 is not semantically correct. diff --git a/llpc/test/shaderdb/general/CbShaderMaskWithDummyExport.spvasm b/llpc/test/shaderdb/general/CbShaderMaskWithDummyExport.spvasm index a191df60db..0053096fda 100644 --- a/llpc/test/shaderdb/general/CbShaderMaskWithDummyExport.spvasm +++ b/llpc/test/shaderdb/general/CbShaderMaskWithDummyExport.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; SPIR-V ; Version: 1.6 ; Generator: Khronos Glslang Reference Front End; 11 diff --git a/llpc/test/shaderdb/general/CbShaderMaskWithDummyExport4.pipe b/llpc/test/shaderdb/general/CbShaderMaskWithDummyExport4.pipe index e71c1b5a7f..54291a3d51 100644 --- a/llpc/test/shaderdb/general/CbShaderMaskWithDummyExport4.pipe +++ b/llpc/test/shaderdb/general/CbShaderMaskWithDummyExport4.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + [Version] version = 75 diff --git a/llpc/test/shaderdb/general/CoherentArray.frag b/llpc/test/shaderdb/general/CoherentArray.frag index 472e487115..99794a460c 100644 --- a/llpc/test/shaderdb/general/CoherentArray.frag +++ b/llpc/test/shaderdb/general/CoherentArray.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(set = 1, binding = 0) buffer coherent b { vec4 v[3]; diff --git a/llpc/test/shaderdb/general/CoherentVector.frag b/llpc/test/shaderdb/general/CoherentVector.frag index 048e33ceee..8a2bc6e71f 100644 --- a/llpc/test/shaderdb/general/CoherentVector.frag +++ b/llpc/test/shaderdb/general/CoherentVector.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(set = 1, binding = 0) coherent buffer b { vec4 v; diff --git a/llpc/test/shaderdb/general/CsPipelineDumpTest.pipe b/llpc/test/shaderdb/general/CsPipelineDumpTest.pipe index 1eda20dee2..e39dc012ac 100644 --- a/llpc/test/shaderdb/general/CsPipelineDumpTest.pipe +++ b/llpc/test/shaderdb/general/CsPipelineDumpTest.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that a compute pipeline dump can be correctly recompiled. ; Create a fresh directory for pipeline dump files. diff --git a/llpc/test/shaderdb/general/CsTimerProfileTest.pipe b/llpc/test/shaderdb/general/CsTimerProfileTest.pipe index cbbcb03c43..cb2b51a253 100644 --- a/llpc/test/shaderdb/general/CsTimerProfileTest.pipe +++ b/llpc/test/shaderdb/general/CsTimerProfileTest.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that timer profile and pipeline info printing works for pipe inputs. ; RUN: amdllpc -v %gfxip %s --enable-timer-profile >%t.stdout 2>%t.stderr \ diff --git a/llpc/test/shaderdb/general/DisableInvariantLoads.pipe b/llpc/test/shaderdb/general/DisableInvariantLoads.pipe index 6d93eb227a..3abb3862b8 100644 --- a/llpc/test/shaderdb/general/DisableInvariantLoads.pipe +++ b/llpc/test/shaderdb/general/DisableInvariantLoads.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} LGC lowering results diff --git a/llpc/test/shaderdb/general/DiscardToDemoteTransformations.frag b/llpc/test/shaderdb/general/DiscardToDemoteTransformations.frag index 8020c49246..efa8283450 100644 --- a/llpc/test/shaderdb/general/DiscardToDemoteTransformations.frag +++ b/llpc/test/shaderdb/general/DiscardToDemoteTransformations.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Check that amdllpc enables discard-to-demote transforms automatically and that they // can be disabled on demand. This should affect the generated code and cache hash. diff --git a/llpc/test/shaderdb/general/DiscardToDemoteTransformationsNotRequired.frag b/llpc/test/shaderdb/general/DiscardToDemoteTransformationsNotRequired.frag index 2e9baeb9b6..dffa7c75b4 100644 --- a/llpc/test/shaderdb/general/DiscardToDemoteTransformationsNotRequired.frag +++ b/llpc/test/shaderdb/general/DiscardToDemoteTransformationsNotRequired.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Check that amdllpc does not apply discard-to-demote attribute to legal discards. // RUN: amdllpc %gfxip --v %s |\ diff --git a/llpc/test/shaderdb/general/ImgDescLoad.comp b/llpc/test/shaderdb/general/ImgDescLoad.comp index 15166a8cf5..46cf965d22 100644 --- a/llpc/test/shaderdb/general/ImgDescLoad.comp +++ b/llpc/test/shaderdb/general/ImgDescLoad.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + // The test checks whether image/sampler descriptor loads are marked with invariant.load metadata. // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/MeshOutputsToAllocas.mesh b/llpc/test/shaderdb/general/MeshOutputsToAllocas.mesh new file mode 100644 index 0000000000..9a6980f3a5 --- /dev/null +++ b/llpc/test/shaderdb/general/MeshOutputsToAllocas.mesh @@ -0,0 +1,61 @@ +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + +// Test to check the optimization of storing mesh outputs to allocas without LDS involvement. +// In this test, the mesh shader is linearly dispatched with the dimensions Y=1 and Z=1. The mesh +// outputs could be stored to allocas and read back from those allocas before primitive/vertex export. +// Such stores can largely reduce the LDS consumption and avoid unnecessary LDS reads/writes, which +// improves the performance of mesh shader noticeably. + +// RUN: amdllpc -gfxip=11 -v %s | FileCheck -check-prefix=SHADERTEST %s + +// SHADERTEST-LABEL: {{^// LLPC}} mesh shader LDS region info (in dwords) and general info +// SHADERTEST: Internal Mesh LDS = 3 dwords +// SHADERTEST: OutputsToAllocas = true + +// SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info +// SHADERTEST: call void @llvm.amdgcn.exp.row.i32(i32 20, i32 1, i32 2098176, i32 poison, i32 poison, i32 poison, i1 true +// SHADERTEST: call void @llvm.amdgcn.exp.row.f32(i32 12, i32 15, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i1 true + +// SHADERTEST-LABEL: {{^// LLPC}} final ELF info +// SHADERTEST: v_mov_b32_e32 v1, 0x200400 +// SHADERTEST: exp prim v1, off, off, off done row_en +// SHADERTEST: v_mov_b32_e32 v0, 0 +// SHADERTEST: exp pos0 v0, v0, v0, v0 done row_en +// SHADERTEST: .lds_size: 0x0000000000000200 + +#version 450 + +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in; +layout(max_vertices = 128, max_primitives = 128, triangles) out; + +struct UniformData +{ + ivec4 zero4u; + ivec4 vertexCount; + ivec4 primCount; +}; + +layout(set = 0, binding = 0, std140) uniform UniformDataBlock +{ + UniformData g_uniforms; +} _30; + +void main() +{ + uint groupID = gl_WorkGroupID.x; + uint groupThreadID = gl_LocalInvocationID.x; + uint meshletIndex = groupID; + uint vertexCount = uint(_30.g_uniforms.vertexCount[meshletIndex % 4u] + 0); + uint primCount = uint(_30.g_uniforms.primCount[meshletIndex % 4u] + 0); + SetMeshOutputsEXT(vertexCount, primCount); + + if (groupThreadID < vertexCount) + { + gl_MeshVerticesEXT[groupThreadID].gl_Position = vec4(0.0); + } + if (groupThreadID < primCount) + { + gl_PrimitiveTriangleIndicesEXT[groupThreadID] = uvec3(0u, 1u, 2u); + } +} diff --git a/llpc/test/shaderdb/general/MissingResourceNodeTest.pipe b/llpc/test/shaderdb/general/MissingResourceNodeTest.pipe index 994ea35524..7f1b931fb5 100644 --- a/llpc/test/shaderdb/general/MissingResourceNodeTest.pipe +++ b/llpc/test/shaderdb/general/MissingResourceNodeTest.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that a pipeline compilation fails when resource nodes are missing ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/NggInCullingMode.pipe b/llpc/test/shaderdb/general/NggInCullingMode.pipe index d2118773ac..acac01a288 100644 --- a/llpc/test/shaderdb/general/NggInCullingMode.pipe +++ b/llpc/test/shaderdb/general/NggInCullingMode.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test is to verify NGG culling mode is enabled as expected. ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/NsaThreshold.pipe b/llpc/test/shaderdb/general/NsaThreshold.pipe index 0e2803845d..21cceb6aaa 100644 --- a/llpc/test/shaderdb/general/NsaThreshold.pipe +++ b/llpc/test/shaderdb/general/NsaThreshold.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v -gfxip=10.1 %s | FileCheck -check-prefix=CHECK %s ; CHECK-NOT: {{^}}attributes #0 {{.*}} "amdgpu-nsa-threshold" diff --git a/llpc/test/shaderdb/general/OptimizePointSizeWrite.pipe b/llpc/test/shaderdb/general/OptimizePointSizeWrite.pipe index f0536cf8ac..e793b12139 100644 --- a/llpc/test/shaderdb/general/OptimizePointSizeWrite.pipe +++ b/llpc/test/shaderdb/general/OptimizePointSizeWrite.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test is to verify the optimization of PointSize write. When the write value of PointSize is 1.0, we can remove ; it safely in Vulkan. The optimization is controlled by the pipeline option optimizePointSizeWrite. diff --git a/llpc/test/shaderdb/general/OutputPrimitiveTest.geom b/llpc/test/shaderdb/general/OutputPrimitiveTest.geom index 8c193bc038..37d4a79500 100644 --- a/llpc/test/shaderdb/general/OutputPrimitiveTest.geom +++ b/llpc/test/shaderdb/general/OutputPrimitiveTest.geom @@ -1,3 +1,5 @@ +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + // This test case checks whether the emit instruction and the primitive type are well-handled when there is no output // of the geometry shader. // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineCs_DebugBreak_intrinsic.pipe b/llpc/test/shaderdb/general/PipelineCs_DebugBreak_intrinsic.pipe index 522ceda4e1..c0fc3af557 100644 --- a/llpc/test/shaderdb/general/PipelineCs_DebugBreak_intrinsic.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_DebugBreak_intrinsic.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -v -gfxip 10.1 %s | FileCheck -check-prefixes=CHECK %s diff --git a/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe b/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe index 6344c54e97..488222fec7 100644 --- a/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -o - -gfxip 10.1 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s diff --git a/llpc/test/shaderdb/general/PipelineCs_ForceMemoryBarrierScope.pipe b/llpc/test/shaderdb/general/PipelineCs_ForceMemoryBarrierScope.pipe index 7f45c7ff25..17e940eb85 100644 --- a/llpc/test/shaderdb/general/PipelineCs_ForceMemoryBarrierScope.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_ForceMemoryBarrierScope.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/general/PipelineCs_LdsSpillLimitDwordsOption.pipe b/llpc/test/shaderdb/general/PipelineCs_LdsSpillLimitDwordsOption.pipe index 94d7ce2791..860a32e09f 100644 --- a/llpc/test/shaderdb/general/PipelineCs_LdsSpillLimitDwordsOption.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_LdsSpillLimitDwordsOption.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-globals ; RUN: amdllpc -o - -gfxip=10.3 --emit-llvm %s | FileCheck -check-prefixes=CHECK %s ; Check that ldsSpillLimitDwords shader option gets propagated to the backend as amdgpu-lds-spill-limit-dwords attribute. diff --git a/llpc/test/shaderdb/general/PipelineCs_MultipleRootInlineBuffer.pipe b/llpc/test/shaderdb/general/PipelineCs_MultipleRootInlineBuffer.pipe index 31f471f91b..c3d23d53a7 100644 --- a/llpc/test/shaderdb/general/PipelineCs_MultipleRootInlineBuffer.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_MultipleRootInlineBuffer.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that LLPC can handle multiple inline buffer in the root table. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineCs_OverrideShaderThreadGroupSize16X16X1.pipe b/llpc/test/shaderdb/general/PipelineCs_OverrideShaderThreadGroupSize16X16X1.pipe index 9afb3c012e..cd75cd4afd 100644 --- a/llpc/test/shaderdb/general/PipelineCs_OverrideShaderThreadGroupSize16X16X1.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_OverrideShaderThreadGroupSize16X16X1.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that overrideThreadGroupSize shader options are set correctly on PAL metadata. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineCs_OverrideShaderThreadGroupSize8X8X1.pipe b/llpc/test/shaderdb/general/PipelineCs_OverrideShaderThreadGroupSize8X8X1.pipe index e4f7513075..84d78fe986 100644 --- a/llpc/test/shaderdb/general/PipelineCs_OverrideShaderThreadGroupSize8X8X1.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_OverrideShaderThreadGroupSize8X8X1.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that overrideThreadGroupSize shader options are set correctly on PAL metadata. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineCs_TestConstImmediateStore.pipe b/llpc/test/shaderdb/general/PipelineCs_TestConstImmediateStore.pipe index 7852dc9fb1..4098bd4736 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestConstImmediateStore.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestConstImmediateStore.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill.pipe b/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill.pipe index 83d46ca85e..02e7faaf0c 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill_lit.pipe b/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill_lit.pipe index e4c14f70d8..585df2cb22 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill_lit.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestDynDescNoSpill_lit.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/general/PipelineCs_TestDynDescSpill.pipe b/llpc/test/shaderdb/general/PipelineCs_TestDynDescSpill.pipe index 2aecfbb99e..16bb006868 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestDynDescSpill.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestDynDescSpill.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased.pipe b/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased.pipe index f6234ff3f6..0a4f5a3a72 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased_lit.pipe b/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased_lit.pipe index 12b581f47c..40d9b08e1b 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased_lit.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskBased_lit.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskOnly.pipe b/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskOnly.pipe index 6f79377805..02d2650b9b 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskOnly.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestFetch2DMSFmaskOnly.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestInlineConstDirect.pipe b/llpc/test/shaderdb/general/PipelineCs_TestInlineConstDirect.pipe index 0548bc4086..967a0be4a6 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestInlineConstDirect.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestInlineConstDirect.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestInlineConstDirect_lit.pipe b/llpc/test/shaderdb/general/PipelineCs_TestInlineConstDirect_lit.pipe index 8b074f0c55..e2642bb591 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestInlineConstDirect_lit.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestInlineConstDirect_lit.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-load-scalarizer=false -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; REQUIRES: do-not-run-me diff --git a/llpc/test/shaderdb/general/PipelineCs_TestInlineConstIndirect.pipe b/llpc/test/shaderdb/general/PipelineCs_TestInlineConstIndirect.pipe index 7175e990dc..a5647800b6 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestInlineConstIndirect.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestInlineConstIndirect.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestInlineConstIndirect_lit.pipe b/llpc/test/shaderdb/general/PipelineCs_TestInlineConstIndirect_lit.pipe index fbaefb4c00..e4772ef97b 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestInlineConstIndirect_lit.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestInlineConstIndirect_lit.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestMultiEntryPoint.pipe b/llpc/test/shaderdb/general/PipelineCs_TestMultiEntryPoint.pipe index 1a91627837..c6c44a74bf 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestMultiEntryPoint.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestMultiEntryPoint.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineCs_TestMultiEntryPoint_lit.pipe b/llpc/test/shaderdb/general/PipelineCs_TestMultiEntryPoint_lit.pipe index f3f7d6342b..80f7a1f701 100644 --- a/llpc/test/shaderdb/general/PipelineCs_TestMultiEntryPoint_lit.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_TestMultiEntryPoint_lit.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineGsTess_TestInOutPacking.pipe b/llpc/test/shaderdb/general/PipelineGsTess_TestInOutPacking.pipe index 5a8c212ed1..e91ecc2adb 100644 --- a/llpc/test/shaderdb/general/PipelineGsTess_TestInOutPacking.pipe +++ b/llpc/test/shaderdb/general/PipelineGsTess_TestInOutPacking.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} transform feedback export info (geometry shader) diff --git a/llpc/test/shaderdb/general/PipelineGs_TestViewIndexAndLayer.pipe b/llpc/test/shaderdb/general/PipelineGs_TestViewIndexAndLayer.pipe index dabfd962a9..c8e3dc800b 100644 --- a/llpc/test/shaderdb/general/PipelineGs_TestViewIndexAndLayer.pipe +++ b/llpc/test/shaderdb/general/PipelineGs_TestViewIndexAndLayer.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // Test gl_ViewIndex when multi-view is disabled ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe b/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe index c0f7a2fdba..401e9f8d07 100644 --- a/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe +++ b/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test is to check output packing of mesh shader in LDS space. If we treat each location ; of a mesh shader output as vec4, the LDS usage will exceed HW limitation. But if we pack ; those outputs tightly in LDS space, the LDS usage will be reduced greatly. @@ -13,8 +21,8 @@ ; SHADERTEST-LABEL: // LLPC mesh shader LDS region info (in dwords) and general info -; SHADERTEST-LABEL: Per-vertex Output : offset = 0x0083, size = 0x0C80 -; SHADERTEST-LABEL: Per-primitive Output : offset = 0x0D03, size = 0x0100 +; SHADERTEST-LABEL: Vertex Outputs : offset = 0x0083, size = 0x0C80 +; SHADERTEST-LABEL: Primitive Outputs : offset = 0x0D03, size = 0x0100 ; SHADERTEST-LABEL: Vertex Outputs Layout (stride = 25, exports = 25): ; SHADERTEST-LABEL: -- location = 0, components = 1, offset = 0 @@ -56,7 +64,7 @@ #extension GL_EXT_mesh_shader: enable #extension GL_EXT_shader_explicit_arithmetic_types: enable -layout(local_size_x=128, local_size_y=1, local_size_z=1) in; +layout(local_size_x=128, local_size_y=2, local_size_z=1) in; layout(points, max_vertices = 128, max_primitives = 128) out; layout(location = 0) out float vertex[][25]; @@ -69,12 +77,12 @@ void main() { SetMeshOutputsEXT(128, 128); for (int i = 0; i < 25; i++) - vertex[gl_LocalInvocationIndex][i] = float(i / 25.0); + vertex[gl_LocalInvocationID.x][i] = float(i / 25.0); - primitive[gl_LocalInvocationIndex][0] = 0.0; - primitive[gl_LocalInvocationIndex][1] = 0.5; + primitive[gl_LocalInvocationID.x][0] = 0.0; + primitive[gl_LocalInvocationID.x][1] = 0.5; - sharedVar[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); + sharedVar[gl_LocalInvocationID.x] = float(gl_LocalInvocationID.x); } [MeshInfo] diff --git a/llpc/test/shaderdb/general/PipelineMesh_TestMismatchMeshInOutWithAllocas.pipe b/llpc/test/shaderdb/general/PipelineMesh_TestMismatchMeshInOutWithAllocas.pipe new file mode 100644 index 0000000000..02d2b30ba8 --- /dev/null +++ b/llpc/test/shaderdb/general/PipelineMesh_TestMismatchMeshInOutWithAllocas.pipe @@ -0,0 +1,96 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; BEGIN_SHADERTEST +; RUN: amdllpc -v -gfxip=11.0 %s | FileCheck -check-prefix=SHADERTEST %s +; SHADERTEST: AMDLLPC SUCCESS +; END_SHADERTEST + +[Version] +version = 75 + +[MeshGlsl] +#version 460 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 3, local_size_y = 1, local_size_z = 1) in; +layout(max_vertices = 3, max_primitives = 3, triangles) out; + +out gl_MeshPerVertexEXT +{ + invariant vec4 gl_Position; +} gl_MeshVerticesEXT[]; + +layout(location = 1) out vec3 ARG[3]; +layout(location = 2) flat out uvec4 ARG_2[3]; +layout(location = 4) perprimitiveEXT out vec2 ARG_1[3]; + +void main() +{ + SetMeshOutputsEXT(3u, 3u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.x = 1.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.y = 1.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.z = 0.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position.w = 1.0; + ARG[gl_LocalInvocationIndex].x = 1.0; + ARG[gl_LocalInvocationIndex].y = 2.0; + ARG[gl_LocalInvocationIndex].z = 3.0; + ARG_2[gl_LocalInvocationIndex].x = 6u; + ARG_2[gl_LocalInvocationIndex].y = 7u; + ARG_2[gl_LocalInvocationIndex].z = 8u; + ARG_2[gl_LocalInvocationIndex].w = 9u; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0u, 1u, 2u); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_LocalInvocationIndex); + ARG_1[gl_LocalInvocationIndex].x = 4.0; + ARG_1[gl_LocalInvocationIndex].y = 5.0; +} + +[MeshInfo] +entryPoint = main +options.clientHash = 0xB4FC8378D8AD8BCF, 0xF5E42AB8B0336FB7 + +[FsGlsl] +#version 460 + +layout(location = 1) in vec3 ARG; +layout(location = 0) out vec4 SV_Target; +layout(location = 1) out vec2 SV_Target_1; +layout(location = 2) out uvec4 SV_Target_2; + +void main() +{ + SV_Target.x = ARG.x; + SV_Target.y = ARG.y; + SV_Target.z = ARG.z; + SV_Target.w = 0.0; + SV_Target_1.x = 0.0; + SV_Target_1.y = 0.0; + SV_Target_2.x = uint(gl_PrimitiveID); + SV_Target_2.y = 0u; + SV_Target_2.z = 0u; + SV_Target_2.w = 0u; +} + +[FsInfo] +entryPoint = main +options.clientHash = 0x47EB651246D94E97, 0xC726082853873C1D + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST +provokingVertexMode = VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT +colorBuffer[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 0 +colorBuffer[1].format = VK_FORMAT_R32G32_SFLOAT +colorBuffer[1].channelWriteMask = 0 +colorBuffer[1].blendEnable = 0 +colorBuffer[1].blendSrcAlphaToColor = 0 +colorBuffer[2].format = VK_FORMAT_R32G32B32A32_UINT +colorBuffer[2].channelWriteMask = 0 +colorBuffer[2].blendEnable = 0 +colorBuffer[2].blendSrcAlphaToColor = 0 + diff --git a/llpc/test/shaderdb/general/PipelineRays_TestLgcRtTraceRayOp.pipe b/llpc/test/shaderdb/general/PipelineRays_TestLgcRtTraceRayOp.pipe index 87b36861ba..5c9cc480ad 100644 --- a/llpc/test/shaderdb/general/PipelineRays_TestLgcRtTraceRayOp.pipe +++ b/llpc/test/shaderdb/general/PipelineRays_TestLgcRtTraceRayOp.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check lgc.rt.trace.ray dialect is being generated. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineTaskMesh_LdsVariables.pipe b/llpc/test/shaderdb/general/PipelineTaskMesh_LdsVariables.pipe index 239df98502..a8ce2a6171 100644 --- a/llpc/test/shaderdb/general/PipelineTaskMesh_LdsVariables.pipe +++ b/llpc/test/shaderdb/general/PipelineTaskMesh_LdsVariables.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v -gfxip 11.0 -o /dev/null %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_OutputComponentNotReadByNextStage.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_OutputComponentNotReadByNextStage.pipe index 5548f45a5c..92b00ad110 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_OutputComponentNotReadByNextStage.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_OutputComponentNotReadByNextStage.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test is to check location/component mapping of a TCS output which is not used by TES but is treated as active ; and is kept since it is read by TCS itself. diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapArrayElemAccess.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapArrayElemAccess.pipe index 9f673caf71..49061028d2 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapArrayElemAccess.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapArrayElemAccess.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapArrayElemDynAccess.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapArrayElemDynAccess.pipe index 092d155e27..961b2a6cec 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapArrayElemDynAccess.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapArrayElemDynAccess.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapLoadBuiltInOutput.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapLoadBuiltInOutput.pipe index e3537bdc71..1bec7c83d5 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapLoadBuiltInOutput.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapLoadBuiltInOutput.pipe @@ -1,8 +1,16 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} HW tessellation configurations -; SHADERTEST: PatchConstants = 0, Size = [0, 0] dwords +; SHADERTEST: PatchConstants = [0, 0], Size = [0, 0] dwords ; SHADERTEST-LABEL: {{^// LLPC}} LGC lowering results ; SHADERTEST: call void @llvm.amdgcn.raw.buffer.store.v4f32 ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapLoadGenericOutput.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapLoadGenericOutput.pipe index 84b7c5aaf7..112115b52e 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapLoadGenericOutput.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapLoadGenericOutput.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapVecCompAccess.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapVecCompAccess.pipe index cd69b34011..3c5d75517b 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapVecCompAccess.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapVecCompAccess.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapVecCompDynAccess.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapVecCompDynAccess.pipe index f630376e73..d42501d5b9 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapVecCompDynAccess.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestLocMapVecCompDynAccess.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForIsoline.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForIsoline.pipe index fcf5361a24..7c67b8b67e 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForIsoline.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForIsoline.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForQuad.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForQuad.pipe index c165a8c670..025c8e0c49 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForQuad.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForQuad.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForTriangle.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForTriangle.pipe index f11f94b134..ae17af6856 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForTriangle.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelDynIndexForTriangle.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForIsoline.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForIsoline.pipe index eeaafe5a3b..9d8c903f29 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForIsoline.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForIsoline.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForQuad.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForQuad.pipe index dc34f0f91e..3807425f2c 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForQuad.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForQuad.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForTriangle.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForTriangle.pipe index cb65fdc136..4909a0675e 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForTriangle.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelElemForTriangle.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForIsoline.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForIsoline.pipe index 7e41737443..156e7009aa 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForIsoline.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForIsoline.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForQuad.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForQuad.pipe index d968a3d774..ee89980c1d 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForQuad.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForQuad.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForTriangle.pipe b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForTriangle.pipe index 67b762b955..b279548482 100644 --- a/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForTriangle.pipe +++ b/llpc/test/shaderdb/general/PipelineTcsTes_TestTessLevelForTriangle.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe b/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe index c48fb49d82..368dc8781b 100644 --- a/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe +++ b/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-part-pipeline=0 -v %gfxip %s | FileCheck -check-prefix=SHADERTEST_PP0 %s ; SHADERTEST_PP0-LABEL: {{^// LLPC}} LGC lowering results diff --git a/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe b/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe index db7235010b..bcb7b85254 100644 --- a/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe +++ b/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test is to check transform feedback from TES while 32 outputs are fully used and they are scalarized to 128 ; components. Therefore, in this case, we have 128 transform feedback write calls. This test is to check if we can ; pack such trasnform feedback outputs correctly in LDS space on GFX11+. @@ -8,24 +16,24 @@ ; SHADERTEST-LABEL: LLPC HW GS configurations ; SHADERTEST: EsGsRingItemSize = 129 dwords -; SHADERTEST-LABEL: .fetchXfbOutput +; SHADERTEST-LABEL: .writeXfb ; Write v4[31] = 4.0 -> LDS ; SHADERTEST: [[ldsPtr1:%[0-9]*]] = getelementptr {{i8|i32}}, ptr addrspace(3) @Lds.GS, i32 %{{[0-9]*}} -; SHADERTEST-NEXT: store i32 1082130432, ptr addrspace(3) [[ldsPtr1]], align 4 +; SHADERTEST-NEXT: store float 4.000000e+00, ptr addrspace(3) [[ldsPtr1]], align 4 ; Write v3[31] = 3.0 -> LDS ; SHADERTEST: [[ldsPtr2:%[0-9]*]] = getelementptr i8, ptr addrspace(3) %{{[0-9]*}}, i32 128 -; SHADERTEST-NEXT: store i32 1077936128, ptr addrspace(3) [[ldsPtr2]], align 4 +; SHADERTEST-NEXT: store float 3.000000e+00, ptr addrspace(3) [[ldsPtr2]], align 4 ; Write v2[31] = 2.0 -> LDS ; SHADERTEST: [[ldsPtr3:%[0-9]*]] = getelementptr i8, ptr addrspace(3) %{{[0-9]*}}, i32 256 -; SHADERTEST-NEXT: store i32 1073741824, ptr addrspace(3) [[ldsPtr3]], align 4 +; SHADERTEST-NEXT: store float 2.000000e+00, ptr addrspace(3) [[ldsPtr3]], align 4 ; Write v1[31] = 1.0 -> LDS ; SHADERTEST: [[ldsPtr4:%[0-9]*]] = getelementptr i8, ptr addrspace(3) %{{[0-9]*}}, i32 384 -; SHADERTEST-NEXT: store i32 1065353216, ptr addrspace(3) [[ldsPtr4]], align 4 +; SHADERTEST-NEXT: store float 1.000000e+00, ptr addrspace(3) [[ldsPtr4]], align 4 -; SHADERTEST-LABEL: .exportXfbOutput +; SHADERTEST-LABEL: .exportXfbInVertex0 ; Read v4[31] <- LDS ; SHADERTEST: [[ldsPtr1:%[0-9]*]] = getelementptr {{i8|i32}}, ptr addrspace(3) @Lds.GS, i32 %{{[0-9]*}} ; SHADERTEST-NEXT: [[v4:%[0-9]*]] = load float, ptr addrspace(3) [[ldsPtr1]], align 4 diff --git a/llpc/test/shaderdb/general/PipelineVsFs_ColorExportShader.pipe b/llpc/test/shaderdb/general/PipelineVsFs_ColorExportShader.pipe index 0b3de54a08..20c93db9a4 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_ColorExportShader.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_ColorExportShader.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -filetype=asm -o - %s | FileCheck -check-prefix=CHECK %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_DisableFMA.pipe b/llpc/test/shaderdb/general/PipelineVsFs_DisableFMA.pipe index 10a91d7161..7864eda082 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_DisableFMA.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_DisableFMA.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // Disable FMA ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe index b01ed348fa..472233c4c0 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function-signature ; RUN: amdllpc -stop-after=lgc-mutate-entry-point -o - %s | FileCheck -check-prefixes=SHADERTEST %s [Version] diff --git a/llpc/test/shaderdb/general/PipelineVsFs_FsWithData.pipe b/llpc/test/shaderdb/general/PipelineVsFs_FsWithData.pipe index 89db2c7336..2525e4f422 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_FsWithData.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_FsWithData.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that constant data in the fragment shader is handled correctly. ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-relocatable-shader-elf -o %t.elf %gfxip %s && llvm-objdump --arch=amdgcn --disassemble-zeroes --mcpu=gfx1010 -d -j .text -j .rodata -r %t.elf | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_GlPositionFMF.pipe b/llpc/test/shaderdb/general/PipelineVsFs_GlPositionFMF.pipe index 9eec0abd6e..3b024970c2 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_GlPositionFMF.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_GlPositionFMF.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // Ensure that fast math flags are removed early enough to prevent // instruction combine removing subtraction for gl_Position computation. diff --git a/llpc/test/shaderdb/general/PipelineVsFs_MultiTableDescSet.pipe b/llpc/test/shaderdb/general/PipelineVsFs_MultiTableDescSet.pipe index 181a5e6e18..8768cecf17 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_MultiTableDescSet.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_MultiTableDescSet.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test has three resources in descriptor set 0, but each is in a // different descriptor table. The test checks that all three are loaded correctly. diff --git a/llpc/test/shaderdb/general/PipelineVsFs_NullFragmentShader.pipe b/llpc/test/shaderdb/general/PipelineVsFs_NullFragmentShader.pipe index 0aaebb6044..d6fc080a0e 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_NullFragmentShader.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_NullFragmentShader.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that the null fragment shader is generated correctly for a graphics pipeline where the shader is not provided. ; BEGIN_GEN_RELOC diff --git a/llpc/test/shaderdb/general/PipelineVsFs_PixelShaderSamplesZero.pipe b/llpc/test/shaderdb/general/PipelineVsFs_PixelShaderSamplesZero.pipe index 7f552f8686..558bfa4b7a 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_PixelShaderSamplesZero.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_PixelShaderSamplesZero.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: define dllexport amdgpu_ps void @_amdgpu_ps_main( diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_line_list.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_line_list.pipe index cc1b9c4d81..7f4562f8ef 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_line_list.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_line_list.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test tests barycentric coordinate when topology is VK_PRIMITIVE_TOPOLOGY_LINE_LIST ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_tri_fan.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_tri_fan.pipe index 5bad45c791..204cbf21df 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_tri_fan.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_tri_fan.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function amdgpu_ps_main ; RUN: amdllpc -filetype=asm -gfxip=10.3 -o - %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_tri_list.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_tri_list.pipe index ae87997a8f..d79ef6666b 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_tri_list.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestBarycentric_tri_list.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test tests barycentric coordinate when topology is VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestColorFormat_A8.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestColorFormat_A8.pipe index cd2a1d69e7..f2d59d0a7a 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestColorFormat_A8.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestColorFormat_A8.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-pal-metadata ; Test color export format: VK_FORMAT_A8_UNORM_KHR which must contain alpha channel. diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestConstImmediateStore.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestConstImmediateStore.pipe index c6a4c91138..a13819236c 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestConstImmediateStore.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestConstImmediateStore.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestDualSourceBlend.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestDualSourceBlend.pipe index f5470d6033..f47e975d4f 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestDualSourceBlend.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestDualSourceBlend.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestDualSourceBlend_onlyOneRTExport.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestDualSourceBlend_onlyOneRTExport.pipe index de3e0fe3ab..4e813681c0 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestDualSourceBlend_onlyOneRTExport.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestDualSourceBlend_onlyOneRTExport.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestExpWithRGB_UINT_PACK32.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestExpWithRGB_UINT_PACK32.pipe index 9053867ed7..75b74475f4 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestExpWithRGB_UINT_PACK32.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestExpWithRGB_UINT_PACK32.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test is to verify that exports using A2R10G10B10_UINT_PACK32 are generated // correctly. diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe index 7dda10a4b2..b6452edc72 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestInOutPacking.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestInOutPacking.pipe index 868e3c8e8a..aa01bcd3f2 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestInOutPacking.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestInOutPacking.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-part-pipeline=0 -v %gfxip %s | FileCheck -check-prefix=SHADERTEST_PP0 %s ; SHADERTEST_PP0-LABEL: LLPC LGC before-lowering results diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestIncludeLlvmIr.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestIncludeLlvmIr.pipe index 01b4878c09..780c86cafd 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestIncludeLlvmIr.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestIncludeLlvmIr.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that AMDGPU.comment.llvmir is included in elf and contains module data. ; RUN: amdllpc -enable-part-pipeline=0 -v %gfxip %s -include-llvm-ir -o %t.pp0_pipe.elf diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe index 53901a975c..a10f02da68 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check the resource layout mode. In "Indirect" mode, "PushConst" resource is not in root node. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestInterpAtCentriodBarycentric.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestInterpAtCentriodBarycentric.pipe index fdfcc0df9d..d4ea65fd88 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestInterpAtCentriodBarycentric.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestInterpAtCentriodBarycentric.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // gl_BaryCoordNoPerspEXT requires different interpolant mode. This tests interpolateAtCentroid(gl_BaryCoordNoPerspEXT) ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestNullFs.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestNullFs.pipe index 81db59dc1d..6672c4b4a4 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestNullFs.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestNullFs.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-pal-metadata ; RUN: amdllpc -gfxip 10.3 -o - -filetype=asm %s | FileCheck -check-prefixes=CHECK %s @@ -167,8 +175,8 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: .ps: ; CHECK-NEXT: .checksum_value: 0 ; CHECK-NEXT: .debug_mode: false -; CHECK-NEXT: .entry_point{{(_symbol)?}}: _amdgpu_ps_main -; CHECK-NEXT: .float_mode: 0xc0 +; CHECK: .entry_point{{(_symbol)?}}: _amdgpu_ps_main +; CHECK: .float_mode: 0xc0 ; CHECK-NEXT: .ieee_mode: false ; CHECK: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestPervertexVariable.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestPervertexVariable.pipe index 97c5cc1b2f..567426cbfa 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestPervertexVariable.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestPervertexVariable.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test tests pervertex variable, the input variable is 2-dim array, we must load value for each vertex, // the index is 0-2. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestPointerInOut.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestPointerInOut.pipe index 15ccb98681..55a09e364e 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestPointerInOut.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestPointerInOut.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-part-pipeline=0 -v %gfxip %s | FileCheck -check-prefix=SHADERTEST_PP0 %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestPrimitiveID_First.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestPrimitiveID_First.pipe index 2d4bff3d51..30ea214e34 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestPrimitiveID_First.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestPrimitiveID_First.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // Test primitiveID when provoking vertex mode is FIRST ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestPrimitiveID_Last.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestPrimitiveID_Last.pipe index 56b121d9cb..b477d1245e 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestPrimitiveID_Last.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestPrimitiveID_Last.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test primitiveID when provoking vertex mode is LAST ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestSubpassInputFmaskBased.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestSubpassInputFmaskBased.pipe index 486c361646..368463ba8f 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestSubpassInputFmaskBased.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestSubpassInputFmaskBased.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestUberShader.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestUberShader.pipe index fce4081e0d..0f89e58907 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestUberShader.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestUberShader.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // Test uber shader ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestVertexDivisor.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestVertexDivisor.pipe index 7511de7f5c..24f71914e9 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestVertexDivisor.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestVertexDivisor.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestVertexFetchWithR8G8.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestVertexFetchWithR8G8.pipe index 19d7b5f904..931553896d 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestVertexFetchWithR8G8.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestVertexFetchWithR8G8.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test is to verify if vertex attribute format R8G8 is correctly converted // to per-component load to workaround a HW defect of unaligned tbuffer_load with // vertex stride less than 4. diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestViewportIndex.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestViewportIndex.pipe index 610a3d14a4..978f904138 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestViewportIndex.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestViewportIndex.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that the value of SPI_PS_INPUT_CNTL_0 is set correctly. ; RUN: amdllpc -enable-part-pipeline=0 -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_Test_unused_outputs.pipe b/llpc/test/shaderdb/general/PipelineVsFs_Test_unused_outputs.pipe index f8c5ef348a..f79ef92458 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_Test_unused_outputs.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_Test_unused_outputs.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -v -gfxip 11.0 %s | FileCheck -check-prefix=CHECK %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_VsAndFsWithData.pipe b/llpc/test/shaderdb/general/PipelineVsFs_VsAndFsWithData.pipe index 07f1b406bf..c4d0c0b5e5 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_VsAndFsWithData.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_VsAndFsWithData.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that constant data in the vertex shader is handled correctly. ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-relocatable-shader-elf -o %t.elf %gfxip %s diff --git a/llpc/test/shaderdb/general/PipelineVsFs_VsWithData.pipe b/llpc/test/shaderdb/general/PipelineVsFs_VsWithData.pipe index 0929d8d788..4682f45c99 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_VsWithData.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_VsWithData.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that constant data in the vertex shader is handled correctly. ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-relocatable-shader-elf -o %t.elf %gfxip %s && \ diff --git a/llpc/test/shaderdb/general/PipelineVsGsFs_TestDwordPacking.pipe b/llpc/test/shaderdb/general/PipelineVsGsFs_TestDwordPacking.pipe index 39fe50e71c..b8f6de23f8 100644 --- a/llpc/test/shaderdb/general/PipelineVsGsFs_TestDwordPacking.pipe +++ b/llpc/test/shaderdb/general/PipelineVsGsFs_TestDwordPacking.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that GS outputs get extended from 16-bit to 32-bit during in/out packing. ; Test that unused GS outputs are removed without corresponding item in the mapping table diff --git a/llpc/test/shaderdb/general/PipelineVsGsFs_TestMergeNode.pipe b/llpc/test/shaderdb/general/PipelineVsGsFs_TestMergeNode.pipe index c036592e68..48b21d3185 100644 --- a/llpc/test/shaderdb/general/PipelineVsGsFs_TestMergeNode.pipe +++ b/llpc/test/shaderdb/general/PipelineVsGsFs_TestMergeNode.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsGs_TestBasicInOut.pipe b/llpc/test/shaderdb/general/PipelineVsGs_TestBasicInOut.pipe index 79e664771f..43e4a6ed3a 100644 --- a/llpc/test/shaderdb/general/PipelineVsGs_TestBasicInOut.pipe +++ b/llpc/test/shaderdb/general/PipelineVsGs_TestBasicInOut.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsGs_TestBuiltinInOut.pipe b/llpc/test/shaderdb/general/PipelineVsGs_TestBuiltinInOut.pipe index 945f3887d8..e692bb5885 100644 --- a/llpc/test/shaderdb/general/PipelineVsGs_TestBuiltinInOut.pipe +++ b/llpc/test/shaderdb/general/PipelineVsGs_TestBuiltinInOut.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PipelineVsPs_TestFetchRGB10A2.pipe b/llpc/test/shaderdb/general/PipelineVsPs_TestFetchRGB10A2.pipe index 83c147c5ad..1c877372b4 100644 --- a/llpc/test/shaderdb/general/PipelineVsPs_TestFetchRGB10A2.pipe +++ b/llpc/test/shaderdb/general/PipelineVsPs_TestFetchRGB10A2.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/PrintOptionsTest.spvasm b/llpc/test/shaderdb/general/PrintOptionsTest.spvasm index e8859e806c..b172c594a8 100644 --- a/llpc/test/shaderdb/general/PrintOptionsTest.spvasm +++ b/llpc/test/shaderdb/general/PrintOptionsTest.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that we can print LLVM CLI options overridden by LLPC. ; 1. Check that only the overridden options gets printed with `--print-options`. diff --git a/llpc/test/shaderdb/general/ScalarBlockLayoutOptionTest.spvasm b/llpc/test/shaderdb/general/ScalarBlockLayoutOptionTest.spvasm index 387155bd52..bdc292f460 100644 --- a/llpc/test/shaderdb/general/ScalarBlockLayoutOptionTest.spvasm +++ b/llpc/test/shaderdb/general/ScalarBlockLayoutOptionTest.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that the pipeline that amdllpc will compile sets the scalar block layout to the value given by the command line ; option. diff --git a/llpc/test/shaderdb/general/ScheduleStrategy.pipe b/llpc/test/shaderdb/general/ScheduleStrategy.pipe new file mode 100644 index 0000000000..46ce788561 --- /dev/null +++ b/llpc/test/shaderdb/general/ScheduleStrategy.pipe @@ -0,0 +1,29 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; RUN: amdllpc --emit-llvm -v -gfxip=11.0 %s | FileCheck -check-prefix=CHECK %s + +; CHECK: {{^}}attributes #0 {{.*}} "amdgpu-sched-strategy"="max-ilp" +; CHECK: {{^}}attributes #1 {{.*}} "amdgpu-sched-strategy"="max-memory-clause" +; REQUIRES: do-not-run-me + +[VsGlsl] +#version 450 core +void main() { } + +[VsInfo] +entryPoint = main +options.scheduleStrategy = MaxIlp + +[FsGlsl] +#version 450 +void main() { } + +[FsInfo] +entryPoint = main +options.scheduleStrategy = MaxMemoryClause diff --git a/llpc/test/shaderdb/general/SubgroupShuffleIndexConstant.comp b/llpc/test/shaderdb/general/SubgroupShuffleIndexConstant.comp index 0a6d93ce97..b94bcc50bf 100644 --- a/llpc/test/shaderdb/general/SubgroupShuffleIndexConstant.comp +++ b/llpc/test/shaderdb/general/SubgroupShuffleIndexConstant.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + // RUN: amdllpc -gfxip 11.0 -filetype=asm -o - %s | FileCheck -check-prefix=GFX11 %s // GFX11-NOT: ds_bpermute_b32 diff --git a/llpc/test/shaderdb/general/SubgroupShuffleIndexDivergent.comp b/llpc/test/shaderdb/general/SubgroupShuffleIndexDivergent.comp index 517f25cacb..125c086181 100644 --- a/llpc/test/shaderdb/general/SubgroupShuffleIndexDivergent.comp +++ b/llpc/test/shaderdb/general/SubgroupShuffleIndexDivergent.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + // RUN: amdllpc -gfxip 11.0 -filetype=asm -o - %s | FileCheck -check-prefix=GFX11 %s // TODO: Should use Wave32 and a single ds_bpermute_b32 diff --git a/llpc/test/shaderdb/general/SubgroupShuffleIndexUniform.comp b/llpc/test/shaderdb/general/SubgroupShuffleIndexUniform.comp index 4d7cb54b95..2c7912cf9c 100644 --- a/llpc/test/shaderdb/general/SubgroupShuffleIndexUniform.comp +++ b/llpc/test/shaderdb/general/SubgroupShuffleIndexUniform.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + // RUN: amdllpc -gfxip 11.0 -filetype=asm -o - %s | FileCheck -check-prefix=GFX11 %s // TODO: Should use v_readlane diff --git a/llpc/test/shaderdb/general/TessInOutWithReadBackOnlyOutputs.pipe b/llpc/test/shaderdb/general/TessInOutWithReadBackOnlyOutputs.pipe new file mode 100644 index 0000000000..43bcdb25ba --- /dev/null +++ b/llpc/test/shaderdb/general/TessInOutWithReadBackOnlyOutputs.pipe @@ -0,0 +1,250 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Test if we handle outputs of tessellation control shader correctly. Some outputs of tessellation control +; shader are not mapped to inputs of tessellation evaluation shader. They are read back only by tessellation +; control shader itself. Such outputs might have conflicts with the location assignments of built-in outputs. Since we +; always cache all outputs of tessellation control shader to on-chip LDS before writing them to off-chip LDS buffer, +; this is not a problem now. The outputs can have on-chip LDS location and off-chip LDS buffer location at the same +; time. + +; RUN: amdllpc -v -gfxip 11.0 -o /dev/null %s | FileCheck -check-prefix=SHADERTEST %s + +; SHADERTEST-LABEL: {{^// LLPC}} location input/output mapping results (TES) +; SHADERTEST: (TES) Input: [location, component] = [0, 0] => Mapped = [0, 0] +; SHADERTEST: (TES) Input: [location, component] = [0, 1] => Mapped = [0, 1] +; SHADERTEST: (TES) Input: [location, component] = [0, 2] => Mapped = [0, 2] +; SHADERTEST: (TES) Input (per-patch): location = 3 => Mapped = 0 + +; SHADERTEST-LABEL: {{^// LLPC}} builtin-to-generic mapping results (TES) +; SHADERTEST: (TES) Input: builtin = Position => Mapped = 1 + +; SHADERTEST-LABEL: {{^// LLPC}} location input/output mapping results (TCS) +; SHADERTEST: (TCS) Output: [location, component] = [0, 0] => Mapped = [0, 0] +; SHADERTEST: (TCS) Output: [location, component] = [0, 1] => Mapped = [0, 1] +; SHADERTEST: (TCS) Output: [location, component] = [0, 2] => Mapped = [0, 2] +; SHADERTEST: (TCS) Output: [location, component] = [1, 0] => Mapped = [1, 0] +; SHADERTEST: (TCS) Output: [location, component] = [1, 1] => Mapped = [1, 1] +; SHADERTEST: (TCS) Output: [location, component] = [1, 2] => Mapped = [1, 2] +; SHADERTEST: (TCS) Output: [location, component] = [1, 3] => Mapped = [1, 3] +; SHADERTEST: (TCS) Output (per-patch): location = 2 => Mapped = 1 +; SHADERTEST: (TCS) Output (per-patch): location = 3 => Mapped = 0 + +; SHADERTEST-LABEL: {{^// LLPC}} builtin-to-generic mapping results (TCS) +; SHADERTEST: (TCS) Output: builtin = Position => Mapped = 2 + +; SHADERTEST-LABEL: {{^// LLPC}} HW tessellation configurations +; SHADERTEST: InputVertices = 4, VertexStride = 5 dwords, Size = 20 dwords +; SHADERTEST: OutputVertices = 4, VertexStride = [13, 8] dwords, Size = [52, 32] dwords +; SHADERTEST: PatchConstants = [2, 1], Size = [9, 4] dwords + +; SHADERTEST-LABEL: {{^// LLPC}} HS output write info +; SHADERTEST: Per-vertex Outputs +; SHADERTEST: location = [0, 0] +; SHADERTEST: location = [2, 1] (builtin = Position) +; SHADERTEST: Per-patch Outputs +; SHADERTEST: location = [0, 0] + +; SHADERTEST: AMDLLPC SUCCESS + +[Version] +version = 75 + +[VsGlsl] +#version 450 + +vec4 positions[8] = vec4[]( + vec4(-1.0, -1.0, 0.0, 1.0), + vec4( 1.0, -1.0, 0.0, 1.0), + vec4(-1.0, 1.0, 0.0, 1.0), + vec4( 1.0, 1.0, 0.0, 1.0), + vec4(-0.5, -0.5, 0.0, 1.0), + vec4( 0.5, -0.5, 0.0, 1.0), + vec4(-0.5, 0.5, 0.0, 1.0), + vec4( 0.5, 0.5, 0.0, 1.0) +); +out gl_PerVertex { + vec4 gl_Position; +}; +void main (void) +{ + gl_Position = positions[gl_VertexIndex]; +} + +[VsInfo] +entryPoint = main + +[TcsGlsl] +#version 450 +#extension GL_EXT_tessellation_shader : require +#extension GL_EXT_shader_explicit_arithmetic_types : enable + +layout (vertices = 4) out; + +layout(location = 0) out vec3 outColor[]; +layout(location = 1) out vec4 vertexData[]; // Read back only + +layout(location = 2) out patch float patchData; +layout(location = 3) out patch float patchColor; // Read back only + +void main () +{ + vertexData[gl_InvocationID] = gl_in[gl_InvocationID].gl_Position; + outColor[gl_InvocationID] = vertexData[gl_InvocationID].xyz; + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + + patchData = float(gl_InvocationID); + patchColor = patchData; + + gl_TessLevelOuter = float[4](1.0, 1.0, 1.0, 1.0); + gl_TessLevelInner = float[2](1.0, 1.0); +} + +[TcsInfo] +entryPoint = main + +[TesGlsl] +#version 450 + +layout(quads, equal_spacing) in; + +layout(location = 0) in vec3 inColor[]; +layout(location = 3) in patch float patchColor; + +layout(location = 0) out vec4 outColor; + +void main () +{ + outColor = vec4(inColor[0], 1.0) + vec4(patchColor); + gl_Position = gl_in[0].gl_Position; +} + +[TesInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) in vec4 inColor; +layout(location = 0) out vec4 outColor; + +void main() +{ + outColor = inColor; +} + +[FsInfo] +entryPoint = main + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST +provokingVertexMode = VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT +patchControlPoints = 4 +deviceIndex = 0 +disableVertexReuse = 0 +switchWinding = 0 +enableMultiView = 0 +useVertexBufferDescArray = 0 +depthClipEnable = 1 +rasterizerDiscardEnable = 0 +innerCoverage = 0 +perSampleShading = 0 +numSamples = 1 +pixelShaderSamples = 0 +samplePatternIdx = 0 +dynamicSampleInfo = 0 +rasterStream = 0 +enableMapClipDistMask = 0 +usrClipPlaneMask = 0 +alphaToCoverageEnable = 0 +dualSourceBlendEnable = 0 +dualSourceBlendDynamic = 0 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 0 +nggState.enableNgg = 1 +nggState.enableGsUse = 0 +nggState.forceCullingMode = 0 +nggState.compactVertex = 0 +nggState.enableBackfaceCulling = 0 +nggState.enableFrustumCulling = 0 +nggState.enableBoxFilterCulling = 0 +nggState.enableSphereCulling = 0 +nggState.enableSmallPrimFilter = 0 +nggState.enableCullDistanceCulling = 0 +nggState.backfaceExponent = 0 +nggState.subgroupSizing = Auto +nggState.primsPerSubgroup = 256 +nggState.vertsPerSubgroup = 256 +unlinked = 0 +dynamicVertexStride = 0 +enableUberFetchShader = 0 +enableEarlyCompile = 0 +enableColorExportShader = 0 +useSoftwareVertexBufferDescriptors = 0 +dynamicTopology = 0 +enableColorClampVs = 0 +enableColorClampFs = 0 +enableFlatShade = 0 +alphaTestFunc = 0 +enableInitialUndefVar = 0 +shaderTraceMask = 0 +originUpperLeft = 0 +forceDisableStreamOut = 0 +vbAddressLowBitsKnown = 0 +advancedBlendInfo.enableAdvancedBlend = 0 +advancedBlendInfo.enableRov = 0 +advancedBlendInfo.binding = 0 +options.includeDisassembly = 0 +options.scalarBlockLayout = 1 +options.reconfigWorkgroupLayout = 0 +options.forceCsThreadIdSwizzling = 0 +options.includeIr = 0 +options.robustBufferAccess = 0 +options.enableRelocatableShaderElf = 0 +options.disableImageResourceCheck = 0 +options.enableScratchAccessBoundsChecks = 0 +options.enableImplicitInvariantExports = 1 +options.shadowDescriptorTableUsage = Disable +options.shadowDescriptorTablePtrHigh = 0 +options.extendedRobustness.robustBufferAccess = 0 +options.extendedRobustness.robustImageAccess = 0 +options.extendedRobustness.nullDescriptor = 0 +options.enableRayQuery = 0 +options.optimizeTessFactor = 1 +options.enableInterpModePatch = 0 +options.pageMigrationEnabled = 0 +options.optimizationLevel = 2 +options.overrideThreadGroupSizeX = 0 +options.overrideThreadGroupSizeY = 0 +options.overrideThreadGroupSizeZ = 0 +options.resourceLayoutScheme = Compact +options.threadGroupSwizzleMode = Default +options.reverseThreadGroup = 0 +options.internalRtShaders = 0 +options.forceNonUniformResourceIndexStageMask = 0 +options.expertSchedulingMode = 0 +options.glState.replaceSetWithResourceType = 0 +options.glState.disableSampleMask = 0 +options.glState.buildResourcesDataForShaderModule = 0 +options.glState.disableTruncCoordForGather = 1 +options.glState.enableCombinedTexture = 0 +options.glState.vertex64BitsAttribSingleLoc = 0 +options.glState.enableFragColor = 0 +options.glState.disableBaseVertex = 0 +options.glState.enablePolygonStipple = 0 +options.glState.enableLineSmooth = 0 +options.glState.emulateWideLineStipple = 0 +options.glState.enablePointSmooth = 0 +options.glState.enableRemapLocation = 0 +options.cacheScopePolicyControl = 0 +options.temporalHintControl = 0x777777 +options.enablePrimGeneratedQuery = 1 +options.disablePerCompFetch = 0 +options.optimizePointSizeWrite = 1 +options.padBufferSizeToNextDword = 0 diff --git a/llpc/test/shaderdb/general/TestBuiltinFrexpLdexp.comp b/llpc/test/shaderdb/general/TestBuiltinFrexpLdexp.comp index 32d13ffded..3ccee56292 100644 --- a/llpc/test/shaderdb/general/TestBuiltinFrexpLdexp.comp +++ b/llpc/test/shaderdb/general/TestBuiltinFrexpLdexp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core #extension GL_AMD_gpu_shader_int16 : enable #extension GL_AMD_gpu_shader_half_float : enable diff --git a/llpc/test/shaderdb/general/TestCombineOfMultipleStoreInstructions.frag b/llpc/test/shaderdb/general/TestCombineOfMultipleStoreInstructions.frag index 7036a95122..149bbf9ba1 100644 --- a/llpc/test/shaderdb/general/TestCombineOfMultipleStoreInstructions.frag +++ b/llpc/test/shaderdb/general/TestCombineOfMultipleStoreInstructions.frag @@ -1,4 +1,11 @@ #version 460 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST // RUN: amdllpc -verify-ir -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/general/TestCompilationOfNestedStructTaskPayload.spvasm b/llpc/test/shaderdb/general/TestCompilationOfNestedStructTaskPayload.spvasm index ce53df1284..cea616d3ea 100644 --- a/llpc/test/shaderdb/general/TestCompilationOfNestedStructTaskPayload.spvasm +++ b/llpc/test/shaderdb/general/TestCompilationOfNestedStructTaskPayload.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v -gfxip=11.0 %s | FileCheck %s ; CHECK-LABEL: {{^}}===== AMDLLPC SUCCESS ===== diff --git a/llpc/test/shaderdb/general/TestComponentIndexing.tese b/llpc/test/shaderdb/general/TestComponentIndexing.tese index fc899bcf73..935da3c84d 100644 --- a/llpc/test/shaderdb/general/TestComponentIndexing.tese +++ b/llpc/test/shaderdb/general/TestComponentIndexing.tese @@ -1,3 +1,5 @@ +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + // BEGIN_SHADERTEST // This test is to verify input mapping of TES when component indexing is encountered. In such // case, we are supposed to reserve all components of locations corresponding to a TES input diff --git a/llpc/test/shaderdb/general/TestConstantImmStore_FunctionInline.frag b/llpc/test/shaderdb/general/TestConstantImmStore_FunctionInline.frag index d4278fcd82..ad74b61b77 100644 --- a/llpc/test/shaderdb/general/TestConstantImmStore_FunctionInline.frag +++ b/llpc/test/shaderdb/general/TestConstantImmStore_FunctionInline.frag @@ -1,4 +1,11 @@ #version 460 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_EXT_samplerless_texture_functions : require // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/TestDeduplicateConstTables.frag b/llpc/test/shaderdb/general/TestDeduplicateConstTables.frag index c58fdff4b7..a9b3b983f9 100644 --- a/llpc/test/shaderdb/general/TestDeduplicateConstTables.frag +++ b/llpc/test/shaderdb/general/TestDeduplicateConstTables.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST // RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s // SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/general/TestDeduplicateConstTables.spvasm b/llpc/test/shaderdb/general/TestDeduplicateConstTables.spvasm index 778ba2a295..f528e7a1e8 100644 --- a/llpc/test/shaderdb/general/TestDeduplicateConstTables.spvasm +++ b/llpc/test/shaderdb/general/TestDeduplicateConstTables.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/general/TestNumComponentsWithReversedAccessOrder.mesh b/llpc/test/shaderdb/general/TestNumComponentsWithReversedAccessOrder.mesh index a9232b340b..49d7647934 100644 --- a/llpc/test/shaderdb/general/TestNumComponentsWithReversedAccessOrder.mesh +++ b/llpc/test/shaderdb/general/TestNumComponentsWithReversedAccessOrder.mesh @@ -1,3 +1,5 @@ +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + // This test is to verify we calculate correct component counts for the outputs when the component // accessing is from large component indices to small ones. Make sure we use the max value to finally // determine the correct component count for an output. diff --git a/llpc/test/shaderdb/general/TestPatchBufferOp.comp b/llpc/test/shaderdb/general/TestPatchBufferOp.comp index 38f271aa03..3a9428846e 100644 --- a/llpc/test/shaderdb/general/TestPatchBufferOp.comp +++ b/llpc/test/shaderdb/general/TestPatchBufferOp.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + // This test case checks whether a phi is well-handed in LowerBufferOperations pass. This shader will result in a phi in IR that // one of the incoming value comes from downstream of the control flow. // BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp b/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp index e7668ced75..12196f26d9 100644 --- a/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp +++ b/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function-signature --check-globals // RUN: amdllpc -o - -gfxip 10.3 -emit-llvm %s | FileCheck -check-prefixes=CHECK %s // This test verifies the optimization on workgroupId works well (i.e., lgc.shader.input.workgroupId.i32 is used not lgc.shader.input.workgroupId.v3i32) diff --git a/llpc/test/shaderdb/general/TestWorkgroupMemoryLayout.spvasm b/llpc/test/shaderdb/general/TestWorkgroupMemoryLayout.spvasm index 6552854d0a..26f9fcd89b 100644 --- a/llpc/test/shaderdb/general/TestWorkgroupMemoryLayout.spvasm +++ b/llpc/test/shaderdb/general/TestWorkgroupMemoryLayout.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test workgroup memory explicit layout. Workgroup variables can be declared in blocks, and then use the same ; explicit layout decorations (e.g. Offset, ArrayStride) as other storage classes; All the Workgroup blocks share ; the same underlying storage and either all or none of the variables must be explicitly laid out. diff --git a/llpc/test/shaderdb/general/UndefVertexOutput.spvasm b/llpc/test/shaderdb/general/UndefVertexOutput.spvasm index 4e4fb93e04..c9b39abb65 100644 --- a/llpc/test/shaderdb/general/UndefVertexOutput.spvasm +++ b/llpc/test/shaderdb/general/UndefVertexOutput.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -o - -filetype=asm -gfxip 10.1 %s | FileCheck -check-prefixes=CHECK %s @@ -72,29 +80,34 @@ ; CHECK-NEXT: s_sendmsg sendmsg(MSG_GS_ALLOC_REQ) ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0 -; CHECK-NEXT: v_lshl_or_b32 v1, s3, 5, v1 -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, s4, v1 +; CHECK-NEXT: v_lshl_or_b32 v6, s3, 5, v1 +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, s1, v6 ; CHECK-NEXT: s_and_saveexec_b32 s2, vcc_lo ; CHECK-NEXT: s_cbranch_execz .LBB0_4 -; CHECK-NEXT: exp prim v0, off, off, off done +; CHECK-NEXT: s_getpc_b64 s[6:7] +; CHECK-NEXT: v_add_nc_u32_e32 v1, s10, v5 +; CHECK-NEXT: s_mov_b32 s1, s7 +; CHECK-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x10 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: tbuffer_load_format_xyzw v[1:4], v1, s[12:15], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen ; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: s_waitcnt_depctr 0xffe3 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s2 -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, s1, v1 -; CHECK-NEXT: s_and_saveexec_b32 s1, vcc_lo +; CHECK-NEXT: v_cmp_gt_u32_e64 s0, s4, v6 +; CHECK-NEXT: s_and_saveexec_b32 s1, s0 ; CHECK-NEXT: s_cbranch_execz .LBB0_6 -; CHECK-NEXT: s_getpc_b64 s[2:3] -; CHECK-NEXT: v_add_nc_u32_e32 v0, s10, v5 -; CHECK-NEXT: s_mov_b32 s1, s3 -; CHECK-NEXT: v_mov_b32_e32 v4, 0 -; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x10 -; CHECK-NEXT: v_mov_b32_e32 v5, 1.0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: exp pos0 v0, v1, v2, v3 done -; CHECK-NEXT: exp param3 off, v4, off, off -; CHECK-NEXT: exp param1 v4, v4, v5, v5 -; CHECK-NEXT: exp param2 v5, v4, v4, v5 +; CHECK-NEXT: exp prim v0, off, off, off done ; CHECK-NEXT: .LBB0_6: +; CHECK-NEXT: s_waitcnt expcnt(0) +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; CHECK-NEXT: s_and_saveexec_b32 s0, vcc_lo +; CHECK-NEXT: s_cbranch_execz .LBB0_8 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: exp pos0 v1, v2, v3, v4 done +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v5, 1.0 +; CHECK-NEXT: exp param3 off, v0, off, off +; CHECK-NEXT: exp param1 v0, v0, v5, v5 +; CHECK-NEXT: exp param2 v5, v0, v0, v5 +; CHECK-NEXT: .LBB0_8: ; CHECK-NEXT: s_endpgm diff --git a/llpc/test/shaderdb/general/VertexOptimizationLevelTest.spvasm b/llpc/test/shaderdb/general/VertexOptimizationLevelTest.spvasm index 8f333d63e4..a6b8814e88 100644 --- a/llpc/test/shaderdb/general/VertexOptimizationLevelTest.spvasm +++ b/llpc/test/shaderdb/general/VertexOptimizationLevelTest.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that enabling pipeline dumps produces the expected files (.pipe, .spv, and .elf). ; Test contains a lot of non-windows commands, so excluding windows from testing diff --git a/llpc/test/shaderdb/general/VertexPipelineDumpTest.spvasm b/llpc/test/shaderdb/general/VertexPipelineDumpTest.spvasm index e8501ccb1d..c54d524b37 100644 --- a/llpc/test/shaderdb/general/VertexPipelineDumpTest.spvasm +++ b/llpc/test/shaderdb/general/VertexPipelineDumpTest.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that enabling pipeline dumps produces the expected files (.pipe, .spv, and .elf). ; Create a fresh directory for pipeline dump files. diff --git a/llpc/test/shaderdb/general/VertexTimerProfileTest.spvasm b/llpc/test/shaderdb/general/VertexTimerProfileTest.spvasm index e02218c00f..dfe67db555 100644 --- a/llpc/test/shaderdb/general/VertexTimerProfileTest.spvasm +++ b/llpc/test/shaderdb/general/VertexTimerProfileTest.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that timer profile and pipeline info printing works with shader inputs. ; RUN: amdllpc -v %gfxip %s --enable-timer-profile >%t.stdout 2>%t.stderr \ diff --git a/llpc/test/shaderdb/general/VsFsPipelineDumpTest.pipe b/llpc/test/shaderdb/general/VsFsPipelineDumpTest.pipe index 9dd25a5785..d95859ffb3 100644 --- a/llpc/test/shaderdb/general/VsFsPipelineDumpTest.pipe +++ b/llpc/test/shaderdb/general/VsFsPipelineDumpTest.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that a graphics pipeline dump can be correctly recompiled. ; Create a fresh directory for pipeline dump files. diff --git a/llpc/test/shaderdb/general/VsFsTimerProfileTest.pipe b/llpc/test/shaderdb/general/VsFsTimerProfileTest.pipe index 82057e0c56..f94461816b 100644 --- a/llpc/test/shaderdb/general/VsFsTimerProfileTest.pipe +++ b/llpc/test/shaderdb/general/VsFsTimerProfileTest.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that timer profile and pipeline info printing works for pipe inputs. ; RUN: amdllpc -v %gfxip %s --enable-timer-profile >%t.stdout 2>%t.stderr \ diff --git a/llpc/test/shaderdb/general/WorkaroundStorageImageFormats.pipe b/llpc/test/shaderdb/general/WorkaroundStorageImageFormats.pipe index 6402cdd457..90d0778024 100644 --- a/llpc/test/shaderdb/general/WorkaroundStorageImageFormats.pipe +++ b/llpc/test/shaderdb/general/WorkaroundStorageImageFormats.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -gfxip 11.0 -emit-lgc -o - %s | FileCheck --check-prefixes=CHECK %s diff --git a/llpc/test/shaderdb/general/WorkgroupSizeLiteral.spvasm b/llpc/test/shaderdb/general/WorkgroupSizeLiteral.spvasm index 88dc7e53af..7b3a4f4150 100644 --- a/llpc/test/shaderdb/general/WorkgroupSizeLiteral.spvasm +++ b/llpc/test/shaderdb/general/WorkgroupSizeLiteral.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} LGC before-lowering results diff --git a/llpc/test/shaderdb/general/outputArray.frag b/llpc/test/shaderdb/general/outputArray.frag new file mode 100644 index 0000000000..d8945e8918 --- /dev/null +++ b/llpc/test/shaderdb/general/outputArray.frag @@ -0,0 +1,26 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + +// This test is to verify fragment outputs of array type. +// We must build color targets for all elements when enabling -auto-layout-desc. + +// BEGIN_SHADERTEST +/* +; RUN: amdllpc -v --verify-ir %gfxip %s | FileCheck -check-prefix=SHADERTEST %s +; SHADERTEST: AMDLLPC SUCCESS +*/ +// END_SHADERTEST + +#version 450 + +layout(location = 0) out vec4 o[8]; +void main() +{ + o[0] = vec4(1.0, 0.0, 0.0, 1.0); + o[2] = vec4(0.0, 1.0, 0.0, 1.0); + o[7] = vec4(0.0, 0.0, 1.0, 1.0); +} diff --git a/llpc/test/shaderdb/gfx10/CheckFMFOptions_NoContract.pipe b/llpc/test/shaderdb/gfx10/CheckFMFOptions_NoContract.pipe index 0283a4e38b..5c76e89a04 100644 --- a/llpc/test/shaderdb/gfx10/CheckFMFOptions_NoContract.pipe +++ b/llpc/test/shaderdb/gfx10/CheckFMFOptions_NoContract.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test checks that setting disableFastMathFlags to 32 (1<<5 == AllowContract) // does actually stop contract (no fma instructions are formed). diff --git a/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_GsVs.pipe b/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_GsVs.pipe index 24b2e80f36..b239bbdd3d 100644 --- a/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_GsVs.pipe +++ b/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_GsVs.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v --gfxip=10.3.0 %s | FileCheck -check-prefix=SHADERTEST %s ; Check the merged GS has acquired GS amdgpu-num-vgpr, VS amdgpu-nsa-threshold. diff --git a/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_GsVsNgg.pipe b/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_GsVsNgg.pipe index 4eccf1e0bc..7159f5d2f9 100644 --- a/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_GsVsNgg.pipe +++ b/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_GsVsNgg.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v --gfxip=10.3.0 %s | FileCheck -check-prefix=SHADERTEST %s ; Check the merged GS has acquired GS amdgpu-num-vgpr, VS amdgpu-nsa-threshold. diff --git a/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_HsLs.pipe b/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_HsLs.pipe index c5d0210708..dd4d8a80b9 100644 --- a/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_HsLs.pipe +++ b/llpc/test/shaderdb/gfx10/PipelineMergeAttributes_HsLs.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v --gfxip=10.3.0 %s | FileCheck -check-prefix=SHADERTEST %s ; Check the merged HS has acquired HS amdgpu-nsa-threshold, LS amdgpu-num-vgpr. diff --git a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestFetchSingleInputNgg.pipe b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestFetchSingleInputNgg.pipe index 14a711b8f8..03c30edca0 100644 --- a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestFetchSingleInputNgg.pipe +++ b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestFetchSingleInputNgg.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that a fetch shader for 1 input is not crashing with NGG. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestSubgroupSizeUsageFragment.pipe b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestSubgroupSizeUsageFragment.pipe index 16b7924816..d4ed7bea23 100644 --- a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestSubgroupSizeUsageFragment.pipe +++ b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestSubgroupSizeUsageFragment.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that gl_SubgroupSize usage results in consistent wave size between stages. ; RUN: amdllpc -enable-part-pipeline=0 -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestSubgroupSizeUsageVertex.pipe b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestSubgroupSizeUsageVertex.pipe index ca28c358df..0e83b10907 100644 --- a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestSubgroupSizeUsageVertex.pipe +++ b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestSubgroupSizeUsageVertex.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that gl_SubgroupSize usage results in consistent wave size between stages. ; RUN: amdllpc -enable-part-pipeline=0 -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe index e12fc9384e..f70e9ecfd5 100644 --- a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe +++ b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-pal-metadata ; Test that VS_OUT_MISC_SIDE_BUS_ENA is set true correctly. diff --git a/llpc/test/shaderdb/gfx10/TestWaveSize.comp b/llpc/test/shaderdb/gfx10/TestWaveSize.comp index 0e65b96ef4..192140362a 100644 --- a/llpc/test/shaderdb/gfx10/TestWaveSize.comp +++ b/llpc/test/shaderdb/gfx10/TestWaveSize.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 // Test that wavefront size is 32. diff --git a/llpc/test/shaderdb/gfx10/WgpModeDisabled.pipe b/llpc/test/shaderdb/gfx10/WgpModeDisabled.pipe index 21e067e179..69bf2165b5 100644 --- a/llpc/test/shaderdb/gfx10/WgpModeDisabled.pipe +++ b/llpc/test/shaderdb/gfx10/WgpModeDisabled.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: LLPC final pipeline module info ; SHADERTEST: {{^}}attributes {{.*}}+cumode diff --git a/llpc/test/shaderdb/gfx10/WgpModeEnabled.pipe b/llpc/test/shaderdb/gfx10/WgpModeEnabled.pipe index c0e7dbc631..e966d2ee42 100644 --- a/llpc/test/shaderdb/gfx10/WgpModeEnabled.pipe +++ b/llpc/test/shaderdb/gfx10/WgpModeEnabled.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: LLPC final pipeline module info ; SHADERTEST: {{^}}attributes {{.*}}-cumode diff --git a/llpc/test/shaderdb/gfx10/lit.local.cfg b/llpc/test/shaderdb/gfx10/lit.local.cfg index c839f74489..9c7e14394d 100644 --- a/llpc/test/shaderdb/gfx10/lit.local.cfg +++ b/llpc/test/shaderdb/gfx10/lit.local.cfg @@ -1,3 +1,27 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### # overwrite %gfxip in config.substitutions config.gfxip = '-gfxip=10.3' diff --git a/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe b/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe index f631569161..f60fce891e 100644 --- a/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe +++ b/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test to check that attributes-through-memory precedes vertex position data exporting ; RUN: amdllpc %gfxip %s -v | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/gfx11/ExtSubgroup_TestReduction.comp b/llpc/test/shaderdb/gfx11/ExtSubgroup_TestReduction.comp index 7c4c26b876..34f599bb27 100644 --- a/llpc/test/shaderdb/gfx11/ExtSubgroup_TestReduction.comp +++ b/llpc/test/shaderdb/gfx11/ExtSubgroup_TestReduction.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 #extension GL_KHR_shader_subgroup_clustered: enable #extension GL_KHR_shader_subgroup_ballot: enable diff --git a/llpc/test/shaderdb/gfx11/FlatParamDpp.frag b/llpc/test/shaderdb/gfx11/FlatParamDpp.frag index 87608e2865..664f2e3fc1 100644 --- a/llpc/test/shaderdb/gfx11/FlatParamDpp.frag +++ b/llpc/test/shaderdb/gfx11/FlatParamDpp.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Check that flat parameter load uses DPP in strict WQM // RUN: amdllpc %gfxip --v %s |\ diff --git a/llpc/test/shaderdb/gfx11/HalfAttribute.frag b/llpc/test/shaderdb/gfx11/HalfAttribute.frag index 9476c58328..c0c69915a2 100644 --- a/llpc/test/shaderdb/gfx11/HalfAttribute.frag +++ b/llpc/test/shaderdb/gfx11/HalfAttribute.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Check that f16 attribute was interpolated using rtz intrinsic. // RUN: amdllpc %gfxip --v %s |\ diff --git a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Cs.pipe b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Cs.pipe index 8dc598c124..7f7b790dae 100644 --- a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Cs.pipe +++ b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Cs.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-pal-metadata ; Test to check that dummy sgpr user data registers are inserted when required ; This test checks wave32 compute shaders @@ -160,8 +168,8 @@ options.threadGroupSwizzleMode = Default ; CHECK-NEXT: .cs: ; CHECK-NEXT: .checksum_value: 0x{{[0-9a-f]+}} ; CHECK-NEXT: .debug_mode: false -; CHECK-NEXT: .entry_point{{(_symbol)?}}: _amdgpu_cs_main -; CHECK-NEXT: .excp_en: 0 +; CHECK: .entry_point{{(_symbol)?}}: _amdgpu_cs_main +; CHECK: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 ; CHECK-NEXT: .ieee_mode: false ; CHECK-NEXT: .image_op: false diff --git a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe index 3942458c66..8d37969110 100644 --- a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe +++ b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-pal-metadata ; Test to check that dummy sgpr user data registers are inserted when required ; This test checks wave32 fragment/pixel shaders @@ -102,10 +110,10 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; CHECK-NEXT: s_and_b32 s0, s0, 0x3c0 ; CHECK-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; CHECK-NEXT: v_add_nc_u32_e32 v1, s0, v1 ; CHECK-NEXT: s_bfe_u32 s0, s2, 0x90016 -; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; CHECK-NEXT: s_bfe_u32 s2, s2, 0x9000c ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s0, v1 ; CHECK-NEXT: s_and_saveexec_b64 s[0:1], vcc ; CHECK-NEXT: s_cbranch_execz .LBB0_2 @@ -113,10 +121,9 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: s_waitcnt expcnt(0) ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] -; CHECK-NEXT: s_bfe_u32 s0, s2, 0x9000c ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s0, v1 -; CHECK-NEXT: s_and_saveexec_b64 s[0:1], vcc +; CHECK-NEXT: s_mov_b64 s[0:1], exec +; CHECK-NEXT: v_cmpx_gt_u32_e64 s2, v1 ; CHECK-NEXT: s_cbranch_execz .LBB0_4 ; CHECK-NEXT: v_mov_b32_e32 v0, 1.0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 @@ -292,8 +299,8 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: .gs: ; CHECK-NEXT: .checksum_value: 0x3e7a1455 ; CHECK-NEXT: .debug_mode: false -; CHECK-NEXT: .entry_point{{(_symbol)?}}: _amdgpu_gs_main -; CHECK-NEXT: .float_mode: 0xc0 +; CHECK: .entry_point{{(_symbol)?}}: _amdgpu_gs_main +; CHECK: .float_mode: 0xc0 ; CHECK-NEXT: .ieee_mode: false ; CHECK-NEXT: .image_op: false ; CHECK-NEXT: .lds_size: 0 @@ -345,8 +352,8 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: .ps: ; CHECK-NEXT: .checksum_value: 0x2cbaf88c ; CHECK-NEXT: .debug_mode: false -; CHECK-NEXT: .entry_point{{(_symbol)?}}: _amdgpu_ps_main -; CHECK-NEXT: .float_mode: 0xc0 +; CHECK: .entry_point{{(_symbol)?}}: _amdgpu_ps_main +; CHECK: .float_mode: 0xc0 ; CHECK-NEXT: .ieee_mode: false ; CHECK-NEXT: .image_op: true ; CHECK: .mem_ordered: true diff --git a/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe b/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe index 46aa926a17..0bcb870b4d 100644 --- a/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe +++ b/llpc/test/shaderdb/gfx11/TessFactorStoreWithOpt.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test to check that the optimization of tessellation factors store are handled as expected ; RUN: amdllpc %gfxip %s -v | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: @_amdgpu_hs_main( @@ -104,7 +112,7 @@ ; SHADERTEST-LABEL: .endHandleMultiWave: ; SHADERTEST-NEXT: %isAllOnesTf = phi i1 [ %isAllOnesTfInWave, %.endCheckSpecialTfInWave ], [ true, %.handleMultiWave ], [ %[[ALL_ONES_IN_GROUP]], %.checkSpecialTfInGroup ] ; SHADERTEST-NEXT: %isAllZerosTf = phi i1 [ %isAllZerosTfInWave, %.endCheckSpecialTfInWave ], [ true, %.handleMultiWave ], [ %[[ALL_ZEROS_IN_GROUP]], %.checkSpecialTfInGroup ] -; SHADERTEST-NEXT: br i1 %validHsPatch, label %.tryStoreTf, label %.endTryStoreHsOutputs +; SHADERTEST-NEXT: br i1 %validHsPatch, label %.tryStoreTf, label %.endTryStoreTf ; SHADERTEST-LABEL: .tryStoreTf: ; SHADERTEST-NEXT: %isSpecialTf = or i1 %isAllOnesTf, %isAllZerosTf @@ -112,20 +120,30 @@ ; SHADERTEST-LABEL: .checkSendTfMessage: ; SHADERTEST-NEXT: %[[FIRST_WAVE:[^ ,]*]] = icmp eq i32 %waveIdInGroup, 0 -; SHADERTEST-NEXT: br i1 %[[FIRST_WAVE]], label %.sendTfMessage, label %.endTryStoreHsOutputs +; SHADERTEST-NEXT: br i1 %[[FIRST_WAVE]], label %.sendTfMessage, label %.endTryStoreTf ; SHADERTEST-LABEL: .sendTfMessage: ; SHADERTEST-NEXT: %[[IS_ALL_ONES_TF:[^ ,]*]] = zext i1 %isAllOnesTf to i32 ; SHADERTEST-NEXT: call void @llvm.amdgcn.s.sendmsg(i32 2, i32 %[[IS_ALL_ONES_TF]]) -; SHADERTEST-NEXT: br label %.endTryStoreHsOutputs +; SHADERTEST-NEXT: br label %.endTryStoreTf ; SHADERTEST-LABEL: .storeTf: -; SHADERTEST: %tfBufferDescPtr = getelementptr i8, ptr addrspace(4) %globalTablePtr, i64 144 +; SHADERTEST-NEXT: %tfBufferDescPtr = getelementptr i8, ptr addrspace(4) %globalTablePtr, i64 144 ; SHADERTEST-NEXT: %tfBufferDesc = load <4 x i32>, ptr addrspace(4) %tfBufferDescPtr, align 16 ; SHADERTEST-NEXT: %[[OUTER_TF_OFFSET:[^ ,]*]] = mul i32 %threadIdInGroup, 24 ; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.buffer.store.v4f32{{(.v4i32)?}}(<4 x float> %outerTf, <4 x i32> %tfBufferDesc, i32 %[[OUTER_TF_OFFSET]], i32 %tfBufferBase, i32 1) ; SHADERTEST-NEXT: %[[INNER_TF_OFFSET:[^ ,]*]] = add i32 %[[OUTER_TF_OFFSET]], 16 ; SHADERTEST-NEXT: call void @llvm.amdgcn.raw.buffer.store.v2f32{{(.v4i32)?}}(<2 x float> %innerTf, <4 x i32> %tfBufferDesc, i32 %[[INNER_TF_OFFSET]], i32 %tfBufferBase, i32 1) +; SHADERTEST-NEXT: br label %.endTryStoreTf + +; SHADERTEST-LABEL: .endTryStoreTf: +; SHADERTEST-NEXT: %[[HS_VERTEX_COUNT:[^ ,]*]] = lshr i32 %mergeWaveInfo, 8 +; SHADERTEST-NEXT: %hsVertexCount = and i32 %[[HS_VERTEX_COUNT]], 255 +; SHADERTEST-NEXT: %validHsVertex = icmp ult i32 %threadIdInWave, %hsVertexCount +; SHADERTEST-NEXT: br i1 %validHsVertex, label %.tryStoreHsOutputs, label %.endTryStoreHsOutputs + +; SHADERTEST-LABEL: .tryStoreHsOutputs: +; SHADERTEST-NEXT: call void @llvm.amdgcn.s.setreg(i32 6401, i32 3) ; SHADERTEST-NEXT: br label %.endTryStoreHsOutputs ; SHADERTEST-LABEL: .endTryStoreHsOutputs: diff --git a/llpc/test/shaderdb/gfx11/TestGdsOperationsForXfb.vert b/llpc/test/shaderdb/gfx11/TestGdsOperationsForXfb.vert index d1d3d65d68..dd908f4175 100644 --- a/llpc/test/shaderdb/gfx11/TestGdsOperationsForXfb.vert +++ b/llpc/test/shaderdb/gfx11/TestGdsOperationsForXfb.vert @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Test to check GDS operations that are required to support GFX11 transform feedback. Also, check // ds_ordered_count is followed by s_waitcnt lgkmcnt(0), which is required by HW on GFX11. @@ -7,19 +14,31 @@ // SHADERTEST: .prepareXfb: // SHADERTEST: [[orderedWaveId0:%.*]] = inttoptr i32 %orderedWaveId to ptr addrspace(2) // SHADERTEST-NEXT: call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) [[orderedWaveId0]], i32 0, i32 0, i32 0, i1 false, i32 16777216, i1 false, i1 false) +// SHADERTEST-NEXT: fence syncscope("workgroup") release // SHADERTEST: call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 %{{.*}}, i32 0) +// SHADERTEST-NEXT: fence syncscope("workgroup") release // SHADERTEST-NEXT: call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 0, i32 4) +// SHADERTEST-NEXT: fence syncscope("workgroup") release // SHADERTEST: [[orderedWaveId1:%.*]] = inttoptr i32 %orderedWaveId to ptr addrspace(2) // SHADERTEST-NEXT: call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) [[orderedWaveId1]], i32 %{{.*}}, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) +// SHADERTEST-NEXT: fence syncscope("workgroup") release +// SHADERTEST: call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 %primCountInSubgroup, i32 32) +// SHADERTEST-NEXT: fence syncscope("workgroup") release +// SHADERTEST-NEXT: call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 %77, i32 36) +// SHADERTEST-NEXT: fence syncscope("workgroup") release // SHADERTEST-LABEL: {{^// LLPC}} final ELF info // SHADERTEST: ds_ordered_count {{v[0-9]*}}, {{v[0-9]*}} gds // SHADERTEST: s_waitcnt lgkmcnt(0) -// SHADERTEST: ds_add_gs_reg_rtn {{v[[0-9]*:[0-9]*]}}, {{v[0-9]*}} gds ; D9EA0000 03000300 -// SHADERTEST: s_waitcnt lgkmcnt(0) ; BF89FC07 -// SHADERTEST: ds_add_gs_reg_rtn {{v[[0-9]*:[0-9]*]}}, {{v[0-9]*}} offset:4 gds ; D9EA0004 04000600 +// SHADERTEST: ds_add_gs_reg_rtn v[{{[0-9]*}}:{{[0-9]*}}], {{v[0-9]*}} gds +// SHADERTEST: s_waitcnt lgkmcnt(0) +// SHADERTEST: ds_add_gs_reg_rtn v[{{[0-9]*}}:{{[0-9]*}}], {{v[0-9]*}} offset:4 gds // SHADERTEST: s_waitcnt lgkmcnt(0) // SHADERTEST: ds_ordered_count {{v[0-9]*}}, {{v[0-9]*}} offset:772 gds +// SHADERTEST: s_waitcnt expcnt(0) lgkmcnt(0) +// SHADERTEST: ds_add_gs_reg_rtn v[{{[0-9]*}}:{{[0-9]*}}], {{v[0-9]*}} offset:32 gds +// SHADERTEST: s_waitcnt lgkmcnt(0) +// SHADERTEST: ds_add_gs_reg_rtn v[{{[0-9]*}}:{{[0-9]*}}], {{v[0-9]*}} offset:36 gds // SHADERTEST: s_waitcnt lgkmcnt(0) #version 450 core diff --git a/llpc/test/shaderdb/gfx11/TestGsXfbWithHole.pipe b/llpc/test/shaderdb/gfx11/TestGsXfbWithHole.pipe index 9a46f7cdd2..d8d0f1e6df 100644 --- a/llpc/test/shaderdb/gfx11/TestGsXfbWithHole.pipe +++ b/llpc/test/shaderdb/gfx11/TestGsXfbWithHole.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test to check GS output handling with XFB on GFX11. The XFB has a hole by specifying the component ; qualifier. In a location, only part of its components are exported to XFB buffer and they are not ; consecutive. diff --git a/llpc/test/shaderdb/gfx11/cooperativeMatrix/array-of-matrices.comp b/llpc/test/shaderdb/gfx11/cooperativeMatrix/array-of-matrices.comp index e85d669f8e..aa2073c501 100644 --- a/llpc/test/shaderdb/gfx11/cooperativeMatrix/array-of-matrices.comp +++ b/llpc/test/shaderdb/gfx11/cooperativeMatrix/array-of-matrices.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -o - -gfxip 11.0 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s @@ -35,16 +60,16 @@ void main() { // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) (...) @lgc.create.load.push.constants.ptr.p4() // CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 2) // CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(7) @lgc.buffer.index(ptr addrspace(7) [[TMP1]], i32 16, i32 0) -// CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP2]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) +// CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP2]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) // CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(7) @lgc.buffer.index(ptr addrspace(7) [[TMP1]], i32 16, i32 32) -// CHECK-NEXT: [[LOAD2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP3]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) +// CHECK-NEXT: [[LOAD2:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP3]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 // CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 2 // CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP11:%.*]] // CHECK: 6: // CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP4]], 1 // CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], <8 x float> [[LOAD2]], <8 x float> [[LOAD]] -// CHECK-NEXT: [[TMP9:%.*]] = call half (...) @lgc.cooperative.matrix.extract__f16(<8 x float> [[TMP8]], i32 3, i32 1, i32 0) +// CHECK-NEXT: [[TMP9:%.*]] = call half (...) @lgc.xdl.cooperative.matrix.extract__f16(<8 x float> [[TMP8]], i32 3, i32 1, i32 0) // CHECK-NEXT: [[TMP10:%.*]] = fptoui half [[TMP9]] to i32 // CHECK-NEXT: br label [[TMP11]] // CHECK: 11: @@ -57,6 +82,6 @@ void main() { // CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], <8 x float> [[LOAD2]], <8 x float> [[LOAD]] // CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP15]], <8 x float> [[TMP17]], <8 x float> zeroinitializer // CHECK-NEXT: [[TMP19:%.*]] = call ptr addrspace(7) @lgc.buffer.index(ptr addrspace(7) [[TMP1]], i32 16, i32 64) -// CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) [[TMP19]], i32 64, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP18]], i32 16) +// CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) [[TMP19]], i32 64, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP18]], i32 16) // CHECK-NEXT: ret void // diff --git a/llpc/test/shaderdb/gfx11/cooperativeMatrix/extract-insert.spvasm b/llpc/test/shaderdb/gfx11/cooperativeMatrix/extract-insert.spvasm index 903928baf1..fc59f35b54 100644 --- a/llpc/test/shaderdb/gfx11/cooperativeMatrix/extract-insert.spvasm +++ b/llpc/test/shaderdb/gfx11/cooperativeMatrix/extract-insert.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -o - -gfxip 11.0 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s @@ -124,27 +132,27 @@ ; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 1, i32 0, i32 2) ; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(7) @lgc.buffer.index(ptr addrspace(7) [[TMP1]], i32 16, i32 0) -; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP2]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) +; CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP2]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) ; CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(7) @lgc.buffer.index(ptr addrspace(7) [[TMP0]], i32 16, i32 0) -; CHECK-NEXT: [[LOAD1:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP3]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) +; CHECK-NEXT: [[LOAD1:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP3]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) ; CHECK-NEXT: br label [[TMP4:%.*]] ; CHECK: 4: ; CHECK-NEXT: [[DOT012:%.*]] = phi i32 [ 0, [[DOTENTRY:%.*]] ], [ [[TMP12:%.*]], [[TMP7:%.*]] ] ; CHECK-NEXT: [[DOT0:%.*]] = phi <8 x float> [ undef, [[DOTENTRY]] ], [ [[TMP11:%.*]], [[TMP7]] ] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @lgc.cooperative.matrix.length(i32 0, i32 16) +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @lgc.xdl.cooperative.matrix.length(i32 0, i32 16) ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[DOT012]], [[TMP5]] ; CHECK-NEXT: [[COND_FREEZE:%.*]] = freeze i1 [[TMP6]] ; CHECK-NEXT: br i1 [[COND_FREEZE]], label [[TMP7]], label [[TMP13:%.*]] ; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call half (...) @lgc.cooperative.matrix.extract__f16(<8 x float> [[LOAD]], i32 [[DOT012]], i32 1, i32 0) -; CHECK-NEXT: [[TMP9:%.*]] = call half (...) @lgc.cooperative.matrix.extract__f16(<8 x float> [[LOAD1]], i32 [[DOT012]], i32 1, i32 0) +; CHECK-NEXT: [[TMP8:%.*]] = call half (...) @lgc.xdl.cooperative.matrix.extract__f16(<8 x float> [[LOAD]], i32 [[DOT012]], i32 1, i32 0) +; CHECK-NEXT: [[TMP9:%.*]] = call half (...) @lgc.xdl.cooperative.matrix.extract__f16(<8 x float> [[LOAD1]], i32 [[DOT012]], i32 1, i32 0) ; CHECK-NEXT: [[TMP10:%.*]] = fmul reassoc nnan nsz arcp contract afn half [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11]] = call <8 x float> (...) @lgc.cooperative.matrix.insert__v8f32(<8 x float> [[DOT0]], half [[TMP10]], i32 [[DOT012]], i32 1, i32 0) +; CHECK-NEXT: [[TMP11]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.insert__v8f32(<8 x float> [[DOT0]], half [[TMP10]], i32 [[DOT012]], i32 1, i32 0) ; CHECK-NEXT: [[TMP12]] = add i32 [[DOT012]], 1 ; CHECK-NEXT: br label [[TMP4]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: 13: ; CHECK-NEXT: [[TMP14:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 2, i32 0, i32 2) ; CHECK-NEXT: [[TMP15:%.*]] = call ptr addrspace(7) @lgc.buffer.index(ptr addrspace(7) [[TMP14]], i32 16, i32 0) -; CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) [[TMP15]], i32 64, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[DOT0]], i32 16) +; CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) [[TMP15]], i32 64, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[DOT0]], i32 16) ; CHECK-NEXT: ret void ; diff --git a/llpc/test/shaderdb/gfx11/cooperativeMatrix/loadstore-uvec4.comp b/llpc/test/shaderdb/gfx11/cooperativeMatrix/loadstore-uvec4.comp index 11f1aebfb5..dc43ac16c8 100644 --- a/llpc/test/shaderdb/gfx11/cooperativeMatrix/loadstore-uvec4.comp +++ b/llpc/test/shaderdb/gfx11/cooperativeMatrix/loadstore-uvec4.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc -o - -gfxip 11.0 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s @@ -25,8 +50,8 @@ void main() { // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 1, i32 0, i32 2) // CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 2) // CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(7) @lgc.buffer.index(ptr addrspace(7) [[TMP1]], i32 16, i32 0) -// CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP2]], i32 64, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) +// CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.xdl.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP2]], i32 64, i1 true, i32 1, i32 0, i32 0, i32 16, i32 16) // CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(7) @lgc.buffer.index(ptr addrspace(7) [[TMP0]], i32 16, i32 0) -// CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) [[TMP3]], i32 64, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[LOAD]], i32 16) +// CHECK-NEXT: call void (...) @lgc.xdl.cooperative.matrix.store(ptr addrspace(7) [[TMP3]], i32 64, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[LOAD]], i32 16) // CHECK-NEXT: ret void // diff --git a/llpc/test/shaderdb/gfx11/lit.local.cfg b/llpc/test/shaderdb/gfx11/lit.local.cfg index 457aad50b6..22bba59e30 100644 --- a/llpc/test/shaderdb/gfx11/lit.local.cfg +++ b/llpc/test/shaderdb/gfx11/lit.local.cfg @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + if "llpc_build_gfx11" not in config.available_features: config.unsupported = True diff --git a/llpc/test/shaderdb/gfx11/ray_tracing/PipelineRays_TestStaticCompile.pipe b/llpc/test/shaderdb/gfx11/ray_tracing/PipelineRays_TestStaticCompile.pipe index ca66ef5c61..24fadaee76 100644 --- a/llpc/test/shaderdb/gfx11/ray_tracing/PipelineRays_TestStaticCompile.pipe +++ b/llpc/test/shaderdb/gfx11/ray_tracing/PipelineRays_TestStaticCompile.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that the ray tracing static compilation is working. ; Generating the instruction 'image_bvh64_intersect_ray' indicates the trace ray library is linked correctly. diff --git a/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe b/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe index b27952230d..3717ab3036 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineLibCes_TestColorExport.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; Test color export shader ; RUN: amdllpc -o - -filetype=asm %s | FileCheck -check-prefixes=CHECK %s diff --git a/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe index 192a811de4..795bc5779c 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineLibFs_TestFsLibrary.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -o - -filetype=asm %s | FileCheck -check-prefixes=CHECK %s [Version] diff --git a/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe index bfcdfd0082..2826355cb6 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineLibVs_TestVsLibrary.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -o - -filetype=asm %s | FileCheck -check-prefixes=CHECK %s [Version] diff --git a/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe index 49dca41847..6b7b337d94 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -v %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPacking.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPacking.spvasm index 9c0e65a4fd..1a54d80d83 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPacking.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPacking.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s -validate-spirv=false | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPackingFullStruct.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPackingFullStruct.spvasm index 08c6718a77..0788c1139b 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPackingFullStruct.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestCBufferArrayPackingFullStruct.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s -validate-spirv=false | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestLoadRowMajorMatrixInStruct.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestLoadRowMajorMatrixInStruct.spvasm index dd0e8d9b5e..27df03360b 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestLoadRowMajorMatrixInStruct.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestLoadRowMajorMatrixInStruct.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s --validate-spirv=false | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} LGC lowering results diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestStoreRowMajorMatrixInStruct.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestStoreRowMajorMatrixInStruct.spvasm index 3a368ee3a4..239a308efb 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestStoreRowMajorMatrixInStruct.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestStoreRowMajorMatrixInStruct.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s --validate-spirv=false | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} LGC lowering results diff --git a/llpc/test/shaderdb/hlsl/Hlsl_TestStructuredBuffers.spvasm b/llpc/test/shaderdb/hlsl/Hlsl_TestStructuredBuffers.spvasm index f0702ea139..fbcdee0a1f 100644 --- a/llpc/test/shaderdb/hlsl/Hlsl_TestStructuredBuffers.spvasm +++ b/llpc/test/shaderdb/hlsl/Hlsl_TestStructuredBuffers.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s -validate-spirv=false | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} LGC lowering results diff --git a/llpc/test/shaderdb/multiple_inputs/SpirvTwoEntryPoints.spvasm b/llpc/test/shaderdb/multiple_inputs/SpirvTwoEntryPoints.spvasm index 7548f4f251..999e86a8e1 100644 --- a/llpc/test/shaderdb/multiple_inputs/SpirvTwoEntryPoints.spvasm +++ b/llpc/test/shaderdb/multiple_inputs/SpirvTwoEntryPoints.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This file contains two entry points: one vertex stage and one fragment stage. ; Check that amdllpc can compile them separately and together. diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs1.frag b/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs1.frag index 7afd5466c1..860afb3530 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs1.frag +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs1.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST // This is not the main test, just to make sure that the shader is valid. // The real check will be included in the used pipelines. diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs2.frag b/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs2.frag index f4cc0f2ee0..2516098fb2 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs2.frag +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/Fs2.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST // This is not the main test, just to make sure that the shader is valid. // The real check will be included in the used pipelines. diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs1.pipe b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs1.pipe index e105a21db2..1e3e53eb5a 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs1.pipe +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs1.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that constant data in the vertex shader is handled correctly. ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-relocatable-shader-elf -o %t_0.elf %gfxip %s diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs2.pipe b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs2.pipe index b570e47228..8b5d7f8e42 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs2.pipe +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs1Fs2.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that constant data in the vertex shader is handled correctly. ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-relocatable-shader-elf -o %t_0.elf %gfxip %s diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe index 05cad7e5fb..255a8d2193 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function _amdgpu_ps_main ; Test that constant data in the fragment shader is handled correctly. ; RUN: amdllpc -v -gfxip 10.1.0 -enable-relocatable-shader-elf %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs1.vert b/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs1.vert index 0db254f255..8513469a20 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs1.vert +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs1.vert @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST // This is not the main test, just to make sure that the shader is valid. // The real check will be included in the used pipelines. diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs2.vert b/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs2.vert index 9496d95915..b3a64dcb4d 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs2.vert +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/Vs2.vert @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST // This is not the main test, just to make sure that the shader is valid. // The real check will be included in the used pipelines. diff --git a/llpc/test/shaderdb/object/ObjConstant_TestArray_lit.frag b/llpc/test/shaderdb/object/ObjConstant_TestArray_lit.frag index 8091133898..89d0db6fc5 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestArray_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location= 0) in vec4 input1; diff --git a/llpc/test/shaderdb/object/ObjConstant_TestElementReuse.frag b/llpc/test/shaderdb/object/ObjConstant_TestElementReuse.frag index 47c68c161e..aee18962a4 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestElementReuse.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestElementReuse.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct Struct { diff --git a/llpc/test/shaderdb/object/ObjConstant_TestMatrix_lit.frag b/llpc/test/shaderdb/object/ObjConstant_TestMatrix_lit.frag index f50c5f5b37..46917fecca 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestMatrix_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantArraySize_lit.vert b/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantArraySize_lit.vert index 8b88a9d9b2..f97df8cd53 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantArraySize_lit.vert +++ b/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantArraySize_lit.vert @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(constant_id = 1) const int SIZE = 6; diff --git a/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantOp_lit.vert b/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantOp_lit.vert index e8b04ee082..928854cf5a 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantOp_lit.vert +++ b/llpc/test/shaderdb/object/ObjConstant_TestSpecConstantOp_lit.vert @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(constant_id = 200) const float f1 = 3.1415926; layout(constant_id = 201) const int i1 = -10; diff --git a/llpc/test/shaderdb/object/ObjConstant_TestSpecialSourceSwizzle_lit.frag b/llpc/test/shaderdb/object/ObjConstant_TestSpecialSourceSwizzle_lit.frag index 44f723c277..a958775c86 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestSpecialSourceSwizzle_lit.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestSpecialSourceSwizzle_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 fragColor; diff --git a/llpc/test/shaderdb/object/ObjConstant_TestStruct_lit.frag b/llpc/test/shaderdb/object/ObjConstant_TestStruct_lit.frag index 6c69ef0a68..416958cd86 100644 --- a/llpc/test/shaderdb/object/ObjConstant_TestStruct_lit.frag +++ b/llpc/test/shaderdb/object/ObjConstant_TestStruct_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct DATA { diff --git a/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag b/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag index d3414c42ce..eb100b51fd 100644 --- a/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag +++ b/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_AMD_shader_fragment_mask: enable diff --git a/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp b/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp index 4dfc678e43..4bccefbb27 100644 --- a/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp +++ b/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 440 precision highp uimageCubeArray; diff --git a/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag b/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag index c785eb1568..4d16acf021 100644 --- a/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(set = 0, binding = 0, rgba32f) coherent readonly uniform image1D img1D; layout(set = 0, binding = 1, rgba32f) restrict uniform image2D img2D; diff --git a/llpc/test/shaderdb/object/ObjInput_TestCsBuiltIn_lit.comp b/llpc/test/shaderdb/object/ObjInput_TestCsBuiltIn_lit.comp index ca6accd956..655f61687b 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestCsBuiltIn_lit.comp +++ b/llpc/test/shaderdb/object/ObjInput_TestCsBuiltIn_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 layout(binding = 0, std430) buffer Buffer diff --git a/llpc/test/shaderdb/object/ObjInput_TestDrawParams_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestDrawParams_lit.vert index 878bb35ec9..4031d8133d 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestDrawParams_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestDrawParams_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_shader_draw_parameters: enable diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag index 20f3645f88..f0ac07778f 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsBasic_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) in vec4 f4; layout(location = 5) flat in int i1; diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsBuiltIn_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsBuiltIn_lit.frag index 0685031f7d..119572c8c6 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsBuiltIn_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsBuiltIn_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec4 f4; #extension GL_EXT_multiview : enable diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag index c1e25e8e31..9281505450 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0, component = 1) in vec2 f2; layout(location = 0, component = 3) in float f1; diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag index 45ac7423ec..9caf08e7d2 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) flat in dvec4 d4; layout(location = 4) flat in dvec3 d3[2]; diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag index d3ded8d6aa..64b4633003 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInBlock_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) in Block { diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag index 32850f194b..15b21e10cf 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierInBlock_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 1) in Block { diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag index 9ad6580c38..64de19aa38 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifierOnStruct_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct S { diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag index a07fbf5cfe..72971ee743 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsInterpQualifier_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 1) smooth in float f1; layout(location = 2) flat in float f2; diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag index 9186c2da12..a8199016d5 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsMatrixArray_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) flat in mat4 m4[2]; diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag index 91f6eeabb7..8d71331f9d 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsMatrix_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) flat in mat4 m4; diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsNonVolatileHelperInvocation.spvasm b/llpc/test/shaderdb/object/ObjInput_TestFsNonVolatileHelperInvocation.spvasm index 971095bdb8..fe184d361e 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsNonVolatileHelperInvocation.spvasm +++ b/llpc/test/shaderdb/object/ObjInput_TestFsNonVolatileHelperInvocation.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that a load from BuiltIn HelperInvocation without volatile decoration is ; correctly lowered to builtin.HelperInvocation. diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag index 8fc0904485..85461e2cd2 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsStruct_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct S { diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag index 8fbb8e2834..d291084a0a 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsVectorArray_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) flat in vec4 f4[2]; diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsVolatileHelperInvocation.spvasm b/llpc/test/shaderdb/object/ObjInput_TestFsVolatileHelperInvocation.spvasm index b23f91cf08..eef988cc73 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsVolatileHelperInvocation.spvasm +++ b/llpc/test/shaderdb/object/ObjInput_TestFsVolatileHelperInvocation.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that a load from BuiltIn HelperInvocation without volatile decoration, ; but with load marked as Volatile, is correctly lowered to an is.helper.invocation. diff --git a/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom b/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom index edef171228..57b857127b 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom +++ b/llpc/test/shaderdb/object/ObjInput_TestGsBasic_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; layout(triangle_strip, max_vertices = 16) out; diff --git a/llpc/test/shaderdb/object/ObjInput_TestGsBuiltIn_lit.geom b/llpc/test/shaderdb/object/ObjInput_TestGsBuiltIn_lit.geom index 34bf02692e..5be0569033 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestGsBuiltIn_lit.geom +++ b/llpc/test/shaderdb/object/ObjInput_TestGsBuiltIn_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; layout(triangle_strip, max_vertices = 16) out; diff --git a/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom b/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom index 94f104869e..fde8c71bee 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom +++ b/llpc/test/shaderdb/object/ObjInput_TestGsCompSpecifier_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; layout(triangle_strip, max_vertices = 4) out; diff --git a/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag index 929e241c0b..49740b37ba 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestIndexingInterpOfInputArray_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function _amdgpu_ps_main // RUN: amdllpc --print-after=lgc-set-up-target-features %s 2>&1 | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc b/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc index c9cb5db6c0..41e42a192a 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsBasic_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsBuiltIn_lit.tesc b/llpc/test/shaderdb/object/ObjInput_TestTcsBuiltIn_lit.tesc index 60fd1a805d..bb1353b3b1 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsBuiltIn_lit.tesc +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsBuiltIn_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc b/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc index 950ea50b9f..6117cac882 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsCompSpecifier_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm b/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm index f76d860e66..ad8c75faec 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsLoadEntireInputArray_lit.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/object/ObjInput_TestTcsViewIndex.spvasm b/llpc/test/shaderdb/object/ObjInput_TestTcsViewIndex.spvasm index fead961269..d5709533e1 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTcsViewIndex.spvasm +++ b/llpc/test/shaderdb/object/ObjInput_TestTcsViewIndex.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} LGC before-lowering results diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese index 2e14480abf..2bb122babe 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesBasic_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesBuiltIn_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesBuiltIn_lit.tese index 95d6eaed45..8bb5d99a78 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesBuiltIn_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesBuiltIn_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese index 00913c5eda..ee730094c3 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesCompSpecifier_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese index 2e1320f9ce..0df785ec09 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesComplexInBlock_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese index ebd07b33e2..95bc59ef4c 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesComplexPatchInBlock_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestTesConstExpr_lit.tese b/llpc/test/shaderdb/object/ObjInput_TestTesConstExpr_lit.tese index d7fa9c6056..6a1260df07 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestTesConstExpr_lit.tese +++ b/llpc/test/shaderdb/object/ObjInput_TestTesConstExpr_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjInput_TestUnUsedVariable_lit.comp b/llpc/test/shaderdb/object/ObjInput_TestUnUsedVariable_lit.comp index 947a38dbe3..0e53605c1d 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestUnUsedVariable_lit.comp +++ b/llpc/test/shaderdb/object/ObjInput_TestUnUsedVariable_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 core layout (set=0, binding=0) buffer MyBuffer { diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert index b230d1ec5b..ccea90ad2c 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsBasic_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec4 f4; layout(location = 1) in int i1; diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsBuiltIn_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsBuiltIn_lit.vert index 4be495798b..4edf368ee6 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsBuiltIn_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsBuiltIn_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out int i1; diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert index 7ba7cb4e66..8dccc61122 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsCompSpecifier_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0, component = 0) in float f1; layout(location = 0, component = 1) in vec2 f2; diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert index ec7546f841..f1f55d3ac5 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsDouble_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) in dvec4 d4; layout(location = 4) in dvec3 d3[2]; diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsMatrix.vert b/llpc/test/shaderdb/object/ObjInput_TestVsMatrix.vert index daa86c79c3..c8c09ca6f1 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsMatrix.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsMatrix.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) in mat4 m4; diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsMatrixArray.vert b/llpc/test/shaderdb/object/ObjInput_TestVsMatrixArray.vert index bb593632bf..1f0a675973 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsMatrixArray.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsMatrixArray.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) in mat4 m4[2]; diff --git a/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert index 7493a50e18..0cdfeac69a 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjInput_TestVsVectorArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 2) in vec4 f4[2]; diff --git a/llpc/test/shaderdb/object/ObjNonUniformIndex_TestLoadRowMajor.comp b/llpc/test/shaderdb/object/ObjNonUniformIndex_TestLoadRowMajor.comp index aa1eadf06f..cd39d269da 100644 --- a/llpc/test/shaderdb/object/ObjNonUniformIndex_TestLoadRowMajor.comp +++ b/llpc/test/shaderdb/object/ObjNonUniformIndex_TestLoadRowMajor.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 #extension GL_EXT_nonuniform_qualifier : enable diff --git a/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag b/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag index 78f0ae2433..2c7455aa27 100644 --- a/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag +++ b/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_EXT_nonuniform_qualifier : require layout(set=0,binding=0) uniform sampler2D samp2Ds[]; layout(set=0,binding=1) uniform sampler2D samp2D; diff --git a/llpc/test/shaderdb/object/ObjNonUniform_TestMinNonUniform.spvasm b/llpc/test/shaderdb/object/ObjNonUniform_TestMinNonUniform.spvasm index b5a032473f..6a2522dbc1 100644 --- a/llpc/test/shaderdb/object/ObjNonUniform_TestMinNonUniform.spvasm +++ b/llpc/test/shaderdb/object/ObjNonUniform_TestMinNonUniform.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} FE lowering results diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsBasic_lit.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsBasic_lit.frag index 421d75d255..54b2d35e2a 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsBasic_lit.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsBasic_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in flat int i0; layout(location = 1) in float i1; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsBuiltIn_lit.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsBuiltIn_lit.frag index 59b768c19f..dc91ddb7fe 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsBuiltIn_lit.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsBuiltIn_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_shader_stencil_export: enable diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsCompSpecifier_lit.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsCompSpecifier_lit.frag index 1b95487d4f..bc5e119bcc 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsCompSpecifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsCompSpecifier_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in vec3 f3; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsNoOut.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsNoOut.frag index a80a0329d4..a7d772c204 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsNoOut.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsNoOut.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + void main() { diff --git a/llpc/test/shaderdb/object/ObjOutput_TestFsVector_lit.frag b/llpc/test/shaderdb/object/ObjOutput_TestFsVector_lit.frag index 942189bae5..49a3950b47 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestFsVector_lit.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestFsVector_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in flat ivec3 i0; layout(location = 1) in vec3 i1; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestGsBasic_lit.geom b/llpc/test/shaderdb/object/ObjOutput_TestGsBasic_lit.geom index b26fcbe1f7..c8536c30a4 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestGsBasic_lit.geom +++ b/llpc/test/shaderdb/object/ObjOutput_TestGsBasic_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; layout(triangle_strip, max_vertices = 16) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestGsBuiltIn_lit.geom b/llpc/test/shaderdb/object/ObjOutput_TestGsBuiltIn_lit.geom index 45990561aa..9fed0f65bb 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestGsBuiltIn_lit.geom +++ b/llpc/test/shaderdb/object/ObjOutput_TestGsBuiltIn_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; layout(triangle_strip, max_vertices = 16) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestGsCompSpecifier_lit.geom b/llpc/test/shaderdb/object/ObjOutput_TestGsCompSpecifier_lit.geom index 38d4e9cf71..e594e776b6 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestGsCompSpecifier_lit.geom +++ b/llpc/test/shaderdb/object/ObjOutput_TestGsCompSpecifier_lit.geom @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; layout(triangle_strip, max_vertices = 4) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestLlpcOpt.frag b/llpc/test/shaderdb/object/ObjOutput_TestLlpcOpt.frag index 6fb48dc19d..f721e89d86 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestLlpcOpt.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestLlpcOpt.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + void main() { diff --git a/llpc/test/shaderdb/object/ObjOutput_TestOpt.frag b/llpc/test/shaderdb/object/ObjOutput_TestOpt.frag index 29aeb46e2a..5f482c396a 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestOpt.frag +++ b/llpc/test/shaderdb/object/ObjOutput_TestOpt.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + void main() { diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc index 07380a86a4..15964efe1b 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsBasic_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsBuiltIn_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsBuiltIn_lit.tesc index 411a37a678..e144ee2991 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsBuiltIn_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsBuiltIn_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsCompSpecifier_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsCompSpecifier_lit.tesc index ef9a35f460..b5c0f705dc 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsCompSpecifier_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsCompSpecifier_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexOutBlock_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexOutBlock_lit.tesc index 61a9aa5ece..b7b1cae4d6 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexOutBlock_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexOutBlock_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(vertices = 5) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexPatchOutBlock_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexPatchOutBlock_lit.tesc index 2925ac0944..ce3c2e54bb 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexPatchOutBlock_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsComplexPatchOutBlock_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(vertices = 5) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc b/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc index bf67f2bf39..c6b3e00ce9 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc +++ b/llpc/test/shaderdb/object/ObjOutput_TestTcsConstExpr_lit.tesc @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(vertices = 3) out; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTesBasic_lit.tese b/llpc/test/shaderdb/object/ObjOutput_TestTesBasic_lit.tese index 5ccb4c8091..3e15ea9682 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTesBasic_lit.tese +++ b/llpc/test/shaderdb/object/ObjOutput_TestTesBasic_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTesBuiltIn_lit.tese b/llpc/test/shaderdb/object/ObjOutput_TestTesBuiltIn_lit.tese index 9ce5e3ef65..a516d203c5 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTesBuiltIn_lit.tese +++ b/llpc/test/shaderdb/object/ObjOutput_TestTesBuiltIn_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #extension GL_ARB_shader_viewport_layer_array: enable diff --git a/llpc/test/shaderdb/object/ObjOutput_TestTesCompSpecifier_lit.tese b/llpc/test/shaderdb/object/ObjOutput_TestTesCompSpecifier_lit.tese index 369eccd404..57e615e0f1 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestTesCompSpecifier_lit.tese +++ b/llpc/test/shaderdb/object/ObjOutput_TestTesCompSpecifier_lit.tese @@ -1,4 +1,6 @@ #version 450 core +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + layout(triangles) in; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsBasic_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsBasic_lit.vert index f96d14f954..d85f3931f0 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsBasic_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsBasic_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) out vec3 f3; layout(location = 1) out int i1; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsBuiltIn_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsBuiltIn_lit.vert index 6b5684005c..1bd691a153 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsBuiltIn_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsBuiltIn_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #extension GL_ARB_shader_viewport_layer_array: enable diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsCompSpecifier_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsCompSpecifier_lit.vert index 6c64f48e5a..9c19a1bfd1 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsCompSpecifier_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsCompSpecifier_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0, component = 0) out float f1; layout(location = 0, component = 2) out vec2 f2; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsDouble_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsDouble_lit.vert index 61d01df9fe..2984e7e6dd 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsDouble_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsDouble_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 1) out dvec4 d4; layout(location = 3) out dvec3 d3[2]; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsMatrix.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsMatrix.vert index 0c8274ea8a..028d11a958 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsMatrix.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsMatrix.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 3) out mat4 m4; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsMatrixArray.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsMatrixArray.vert index 31b9f6c074..d665c36dcf 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsMatrixArray.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsMatrixArray.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 3) out mat4 m4[2]; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsNoBuiltIn_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsNoBuiltIn_lit.vert index cf1d90ae8b..3e983de739 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsNoBuiltIn_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsNoBuiltIn_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 1) in vec4 f4; layout(location = 2) out float f1; diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsNoGeneric_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsNoGeneric_lit.vert index 35171f0dd1..77a2b7eb11 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsNoGeneric_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsNoGeneric_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + void main() { diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsOutBlock_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsOutBlock_lit.vert index 7f2300c7d9..f46f62df67 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsOutBlock_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsOutBlock_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 3) out Block { diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsStruct_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsStruct_lit.vert index b3be3b6f9d..2ebc71674e 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsStruct_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct S { diff --git a/llpc/test/shaderdb/object/ObjOutput_TestVsVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjOutput_TestVsVectorArray_lit.vert index 616db685f3..0be3d5e3fd 100644 --- a/llpc/test/shaderdb/object/ObjOutput_TestVsVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjOutput_TestVsVectorArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(location = 0) in float f1; layout(location = 3) out vec4 f4[2]; diff --git a/llpc/test/shaderdb/object/ObjPushConst_TestBasic_lit.vert b/llpc/test/shaderdb/object/ObjPushConst_TestBasic_lit.vert index 65dd382178..ec9067231d 100644 --- a/llpc/test/shaderdb/object/ObjPushConst_TestBasic_lit.vert +++ b/llpc/test/shaderdb/object/ObjPushConst_TestBasic_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(push_constant) uniform PCB { diff --git a/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert b/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert index 2039876a88..30fc4ee97b 100644 --- a/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjPushConst_TestNestedStruct_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct Str { diff --git a/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert b/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert index f42bf0d371..bcfb946d21 100644 --- a/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert +++ b/llpc/test/shaderdb/object/ObjPushConst_TestSpillToMemory_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(push_constant) uniform PCB { diff --git a/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag b/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag index 290851a55a..b900f63a72 100644 --- a/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag +++ b/llpc/test/shaderdb/object/ObjPushConstant_TestBasic_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, push_constant) uniform PushConstant { diff --git a/llpc/test/shaderdb/object/ObjPushConstant_TestMultiPushConstant_lit.spvasm b/llpc/test/shaderdb/object/ObjPushConstant_TestMultiPushConstant_lit.spvasm index 4e7e077bea..01b9b65feb 100644 --- a/llpc/test/shaderdb/object/ObjPushConstant_TestMultiPushConstant_lit.spvasm +++ b/llpc/test/shaderdb/object/ObjPushConstant_TestMultiPushConstant_lit.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag b/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag index bedeefb8b3..2f88741339 100644 --- a/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag +++ b/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout (std140, binding = 1) uniform BB1 { diff --git a/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag b/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag index 1520c02b44..0fde0ca6c8 100644 --- a/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag +++ b/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(set=0, binding=0) uniform sampler2D s0; layout(set=0, binding=1) uniform sampler2D s1; diff --git a/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag b/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag index 4900dc339b..4b41ad0f0c 100644 --- a/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag +++ b/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(set = 0, binding = 0) uniform texture2D tex2D; layout(set = 0, binding = 1) uniform samplerShadow sampShadow; diff --git a/llpc/test/shaderdb/object/ObjShaderBallot_TestGeneral_lit.comp b/llpc/test/shaderdb/object/ObjShaderBallot_TestGeneral_lit.comp index 9108068f81..1fee5971a3 100644 --- a/llpc/test/shaderdb/object/ObjShaderBallot_TestGeneral_lit.comp +++ b/llpc/test/shaderdb/object/ObjShaderBallot_TestGeneral_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 #extension GL_ARB_gpu_shader_int64: enable diff --git a/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp b/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp index 94c6c8aa7b..ce46267517 100644 --- a/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp +++ b/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 layout (local_size_x = 16, local_size_y = 16) in; diff --git a/llpc/test/shaderdb/object/ObjSharedVariable_TestArray_lit.comp b/llpc/test/shaderdb/object/ObjSharedVariable_TestArray_lit.comp index 2623ea3276..ed136b10af 100644 --- a/llpc/test/shaderdb/object/ObjSharedVariable_TestArray_lit.comp +++ b/llpc/test/shaderdb/object/ObjSharedVariable_TestArray_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 layout (local_size_x = 16, local_size_y = 16) in; diff --git a/llpc/test/shaderdb/object/ObjSharedVariable_TestBasic_lit.comp b/llpc/test/shaderdb/object/ObjSharedVariable_TestBasic_lit.comp index 7035cb53bc..be611d417c 100644 --- a/llpc/test/shaderdb/object/ObjSharedVariable_TestBasic_lit.comp +++ b/llpc/test/shaderdb/object/ObjSharedVariable_TestBasic_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 layout(binding = 0) uniform Uniforms diff --git a/llpc/test/shaderdb/object/ObjSharedVariable_TestMatrix_lit.comp b/llpc/test/shaderdb/object/ObjSharedVariable_TestMatrix_lit.comp index 1781e77336..88d2cb268f 100644 --- a/llpc/test/shaderdb/object/ObjSharedVariable_TestMatrix_lit.comp +++ b/llpc/test/shaderdb/object/ObjSharedVariable_TestMatrix_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 layout(binding = 0) uniform Uniforms diff --git a/llpc/test/shaderdb/object/ObjSharedVariable_TestStruct_lit.comp b/llpc/test/shaderdb/object/ObjSharedVariable_TestStruct_lit.comp index fe3a5631eb..f38cabe534 100644 --- a/llpc/test/shaderdb/object/ObjSharedVariable_TestStruct_lit.comp +++ b/llpc/test/shaderdb/object/ObjSharedVariable_TestStruct_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 layout (local_size_x = 16, local_size_y = 16) in; diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestAlign_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestAlign_lit.frag index cdb2575c49..b4c191502d 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestAlign_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestAlign_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct str { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestDirectIndex_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestDirectIndex_lit.frag index b18ccf05a1..3165a77d5e 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestDirectIndex_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestDirectIndex_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, column_major, set = 0, binding = 1) buffer BufferObject { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestDouble_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestDouble_lit.frag index 9357c6cfce..9d9ff7078c 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestDouble_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestDouble_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, column_major, set = 0, binding = 0) buffer BufferObject { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestIndirectIndex_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestIndirectIndex_lit.frag index 301e47b4ae..2d7429b348 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestIndirectIndex_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestIndirectIndex_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, column_major, set = 0, binding = 0) buffer BufferObject { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMatrixInStruct_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestMatrixInStruct_lit.vert index 9e5219fb51..2dfe798b45 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMatrixInStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMatrixInStruct_lit.vert @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct PosAttrib { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt16.comp b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt16.comp index b45c8aa596..a9c3c1c2cf 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt16.comp +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt16.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int16: enable diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt32.comp b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt32.comp index b88d64b72e..09cb245877 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt32.comp +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt32.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 #define SIZE 65536 diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt8.comp b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt8.comp index 6d3cd23ffa..b3ec121bc6 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt8.comp +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemCpyInt8.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int8: enable diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt16.comp b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt16.comp index 8558704ab3..f53ac0f5cd 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt16.comp +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt16.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int16: enable diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt32.comp b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt32.comp index b4c3a0f408..7f4301f4a2 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt32.comp +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt32.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 #define SIZE 65536 diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt8.comp b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt8.comp index d3edfd2970..f7b5797722 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt8.comp +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemSetInt8.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 #extension GL_EXT_shader_explicit_arithmetic_types_int8: enable diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemoryQualifier_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemoryQualifier_lit.frag index 79c71df2f0..dd7631d689 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMemoryQualifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMemoryQualifier_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(set = 1, binding = 0) coherent buffer Buffer { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert index 9cdbf97e20..c8eda55ef1 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct S { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestOffset_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestOffset_lit.frag index 3113847140..72b9190cb9 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestOffset_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestOffset_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct str { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestRowMajor_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestRowMajor_lit.frag index c94006f2ca..1e6593dce5 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestRowMajor_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestRowMajor_lit.frag @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py #version 450 diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestRuntimeArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestRuntimeArray_lit.vert index 94ef5423e1..e9954c075e 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestRuntimeArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestRuntimeArray_lit.vert @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py // RUN: amdllpc --print-after=lower-translator -filetype=asm -o - 2>&1 %s | FileCheck -check-prefixes=SHADERTEST %s #version 450 core diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicDouble_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicDouble_lit.vert index 2182dda624..854b695ee8 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicDouble_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicDouble_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicFloat_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicFloat_lit.vert index 84501c040f..3f013c5f50 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicFloat_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicFloat_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicInt_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicInt_lit.vert index 9d287f3fe2..d592bf402e 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicInt_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicInt_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicUint_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicUint_lit.vert index 278933a082..4b61e9ac96 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicUint_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreBasicUint_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrixArray_lit.vert index 6ee39dd6d1..d5e16288c1 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrixArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrix_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrix_lit.vert index ea1c923268..47b6655f1d 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrix_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMatrix_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMixedMatrixStyle_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMixedMatrixStyle_lit.frag index 6ec89741ef..ad58d4e4a7 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMixedMatrixStyle_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreMixedMatrixStyle_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0, column_major) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreRowMajorMatrix_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreRowMajorMatrix_lit.frag index 632fcefea6..8c890cfb6b 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreRowMajorMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreRowMajorMatrix_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0, row_major) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreScalarArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreScalarArray_lit.vert index 94ac80c6c9..eaca7cdb1d 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreScalarArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreScalarArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreStruct_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreStruct_lit.vert index 32af0321bd..17b0d06cdd 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreStruct_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct S { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrixArray_lit.vert index efcae99365..542e4fe013 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrixArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrix_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrix_lit.vert index 0714115f0f..9d28d89511 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrix_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToMatrix_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToRowMajorMatrix_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToRowMajorMatrix_lit.frag index 613ce955c8..bbc85d7998 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToRowMajorMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToRowMajorMatrix_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0, row_major) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToScalarVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToScalarVectorArray_lit.vert index 73ff4a56bc..25d065aa7e 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToScalarVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreToScalarVectorArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray.spvasm b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray.spvasm index da9b000577..db4d8820fb 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray.spvasm +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray_lit.vert index de7cde28aa..8ff3e9e287 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestStoreVectorArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std430, binding = 0) buffer Block { diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestUseStorageBuffer_lit.frag b/llpc/test/shaderdb/object/ObjStorageBlock_TestUseStorageBuffer_lit.frag index 05bdf14bfd..f9dcccf2a3 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestUseStorageBuffer_lit.frag +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestUseStorageBuffer_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + #pragma use_storage_buffer diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestVectorComponentStore_lit.comp b/llpc/test/shaderdb/object/ObjStorageBlock_TestVectorComponentStore_lit.comp index 5d011113a3..3854d40989 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestVectorComponentStore_lit.comp +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestVectorComponentStore_lit.comp @@ -1,3 +1,28 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + #version 450 layout(set = 0, binding = 0) buffer BO { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestAlign_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestAlign_lit.frag index 4c3d87dcde..36b4671e78 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestAlign_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestAlign_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct str { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag index cfddbcec6d..7b31c452db 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(set = 1, binding = 0) uniform BB { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestIndirectIndex_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestIndirectIndex_lit.frag index 96af207de5..fd900d5774 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestIndirectIndex_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestIndirectIndex_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(set = 0, binding = 1) uniform BB { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicDouble_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicDouble_lit.vert index ece92ad7cb..e3f593e46a 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicDouble_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicDouble_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicFloat_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicFloat_lit.vert index 8a0a2f5242..e94063a772 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicFloat_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicFloat_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicInt_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicInt_lit.vert index 7d79645cb5..f0f2c6996b 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicInt_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicInt_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicUint_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicUint_lit.vert index 98949a0fd5..a9295b832c 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicUint_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadBasicUint_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrixArray_lit.vert index c980b409e7..93f2702cf5 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrixArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrix_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrix_lit.vert index 4ff4e5a72a..9152917971 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrix_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromMatrix_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromRowMajorMatrix_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromRowMajorMatrix_lit.frag index 82c8cea514..4ead173d58 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromRowMajorMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromRowMajorMatrix_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0, row_major) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromScalarVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromScalarVectorArray_lit.vert index 30251dc053..18a98163d3 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromScalarVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadFromScalarVectorArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert index 9207cc6667..63d51bfeca 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrix_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrix_lit.vert index 7edfacd88f..375fafb545 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrix_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrix_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMixedMatrixStyle_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMixedMatrixStyle_lit.frag index 78dcb969d2..3a85962e53 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMixedMatrixStyle_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMixedMatrixStyle_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0, column_major) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadNestedStruct_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadNestedStruct_lit.vert index 7afc1434c5..7879cf7bc5 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadNestedStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadNestedStruct_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct S0 { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadRowMajorMatrix_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadRowMajorMatrix_lit.frag index 04582beb8e..36c342fcde 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadRowMajorMatrix_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadRowMajorMatrix_lit.frag @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0, row_major) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadScalarArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadScalarArray_lit.vert index d5b0a22952..a5a58d951e 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadScalarArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadScalarArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadStruct_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadStruct_lit.vert index 46151ae51e..4b1d1388d0 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadStruct_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadStruct_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct S { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadVectorArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadVectorArray_lit.vert index 19e8016cde..013a2c43ad 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadVectorArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadVectorArray_lit.vert @@ -1,4 +1,11 @@ #version 450 core +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(std140, binding = 0) uniform Block { diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestOffset_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestOffset_lit.frag index d411c25e5e..7d8afaf2ff 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestOffset_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestOffset_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + struct str { diff --git a/llpc/test/shaderdb/object/ObjUniformConstant_TestArray_lit.frag b/llpc/test/shaderdb/object/ObjUniformConstant_TestArray_lit.frag index cb38aa03ab..6dea238616 100644 --- a/llpc/test/shaderdb/object/ObjUniformConstant_TestArray_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformConstant_TestArray_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/object/ObjUniformConstant_TestSimple_lit.frag b/llpc/test/shaderdb/object/ObjUniformConstant_TestSimple_lit.frag index 0d3d74dfef..279b698686 100644 --- a/llpc/test/shaderdb/object/ObjUniformConstant_TestSimple_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformConstant_TestSimple_lit.frag @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(binding = 0) uniform Uniforms { diff --git a/llpc/test/shaderdb/object/ObjXfb_TestBasic_lit.vert b/llpc/test/shaderdb/object/ObjXfb_TestBasic_lit.vert index 653908e0e2..9dff922fda 100644 --- a/llpc/test/shaderdb/object/ObjXfb_TestBasic_lit.vert +++ b/llpc/test/shaderdb/object/ObjXfb_TestBasic_lit.vert @@ -1,4 +1,11 @@ #version 450 +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + layout(xfb_buffer = 0, xfb_offset = 16, xfb_stride = 32, location = 0) out vec4 output1; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe index aa60416fc8..0c979a0932 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; RUN: amdllpc -o - -gfxip 11.0.1 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s [Version] diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations.pipe index 146adb0a2c..07a0b7bfc2 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations.pipe @@ -1,13 +1,24 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that the ray tracing continuations mode is working. ; Generating the instruction 'image_bvh64_intersect_ray' indicates the trace ray library is linked correctly. +; Among other things, make sure the !lgc.cps.maxArgumentVgprs metadata is the same for all modules in the pipeline. ; TODO: Change this to ISA / assembly output checks once the LLVM backend has settled ; RUN: amdllpc -gfxip 11.0 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK %s ; RUN: amdllpc -gfxip 11.0 -filetype=asm -add-rt-helpers 1 -o - %s | FileCheck -check-prefixes=ASM %s +; Main doesn't contain any CPS functions, so we don't emit the maxArgumentVgprs metadata. ; CHECK-LABEL: @_amdgpu_cs_main( ; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. +; CHECK-NOT: !lgc.cps.maxArgumentVgprs ; ASM: {{^}}_amdgpu_cs_main: @@ -21,11 +32,17 @@ ; CHECK: unreachable ; CHECK: ret void +; CHECK: !lgc.cps.maxArgumentVgprs = !{[[ARG_BOUND_RGEN:![0-9]+]]} +; CHECK: [[ARG_BOUND_RGEN]] = !{i32 [[ARG_BOUND_VALUE:[0-9]+]]} + ; ASM: {{^}}_rgen_1.resume.0: ; CHECK-LABEL: @_chit_2( ; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. +; CHECK: !lgc.cps.maxArgumentVgprs = !{[[ARG_BOUND_CHIT:![0-9]+]]} +; CHECK: [[ARG_BOUND_CHIT]] = !{i32 [[ARG_BOUND_VALUE]]} + ; ASM: {{^}}_chit_2: ; CHECK-LABEL: @_cs_( @@ -34,6 +51,9 @@ ; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. ; CHECK-NOT: ret void +; CHECK: !lgc.cps.maxArgumentVgprs = !{[[ARG_BOUND_CS:![0-9]+]]} +; CHECK: [[ARG_BOUND_CS]] = !{i32 [[ARG_BOUND_VALUE]]} + ; ASM: {{^}}_cs_: ; ASM: image_bvh64_intersect_ray diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_IntersectionShaderVgprCount.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_IntersectionShaderVgprCount.pipe index dc0b9e2af0..5709725642 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_IntersectionShaderVgprCount.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_IntersectionShaderVgprCount.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that the payload register count for intersection shader is reasonable ; RUN: amdllpc -gfxip=11.0 --report-payload-register-sizes=max %s 2>&1 | FileCheck -check-prefix=CHECK-FROM-LIB %s diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders.pipe index a4ada2ac99..6de60c0401 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that driver shader specialization (SDS) works on LLPC raytracing pipelines. ; ; This test consists of two files: diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders_Isa.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders_Isa.pipe index c3957b33e0..af09e0fa67 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders_Isa.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Continuations_SpecializeDriverShaders_Isa.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that driver shader specialization (SDS) works on LLPC raytracing pipelines. ; ; This test consists of two files: diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_DifferentPayloads.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_DifferentPayloads.pipe new file mode 100644 index 0000000000..10decef4eb --- /dev/null +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_DifferentPayloads.pipe @@ -0,0 +1,166 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; Check that we can have a shader with a really small payload and one with a really large payload. +; Both should have the same value for !lgc.cps.maxArgumentVgprs. + +; RUN: amdllpc -gfxip 11.0 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK %s + +; Main doesn't contain any CPS functions, so we won't set maxArgumentVgprs. +; CHECK-LABEL: @_amdgpu_cs_main( +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. +; CHECK-NOT: !lgc.cps.maxArgumentVgprs + +; CHECK-LABEL: @_rgen_1( +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. + +; CHECK-LABEL: @_rgen_1.resume.0( +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. + +; CHECK: !lgc.cps.maxArgumentVgprs = !{[[ARG_BOUND_RGEN:![0-9]+]]} +; CHECK: [[ARG_BOUND_RGEN]] = !{i32 70} + +; CHECK-LABEL: @_chit_2( +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. + +; CHECK: !lgc.cps.maxArgumentVgprs = !{[[ARG_BOUND_CHIT:![0-9]+]]} +; CHECK: [[ARG_BOUND_CHIT]] = !{i32 70} + +; CHECK-LABEL: @_cs_( +; CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray. +; CHECK-NOT: ret void +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. +; CHECK-NOT: ret void + +; CHECK: !lgc.cps.maxArgumentVgprs = !{[[ARG_BOUND_CS:![0-9]+]]} +; CHECK: [[ARG_BOUND_CS]] = !{i32 70} + +[Version] +version = 69 + +[rgenGlsl] +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + float small; +}; + +layout(binding = 0, set = 0) uniform accelerationStructureEXT g_bvh; +layout(binding = 1, set = 0, rgba32f) uniform image2D g_dst; + +layout(location = 14) rayPayloadEXT RayPayload g_ray; + +void main() { + vec3 origin; + origin.x = gl_LaunchIDEXT.x; + origin.y = gl_LaunchIDEXT.y; + origin.z = 0; + + traceRayEXT(g_bvh, /* ray flags */ 0, /* cull mask */ 0xff, + /* sbt offset */ 0, /* sbt stride */ 1, /* miss index */ 0, + origin.xyz, /* tmin */ 0.0, /* direction */ vec3(1, 0, 0), + /* tmax */ 48.0, /* payload location */ 14); + + imageStore(g_dst, ivec2(gl_LaunchIDEXT.xy), vec4(g_ray.small, 0, 0, 0)); +} + +[rgenInfo] +entryPoint = main + +[chitGlsl] +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + vec3 large[100]; +}; + +layout(shaderRecordEXT, std430) buffer sbt { + float z; +}; + +hitAttributeEXT vec2 g_hit; +rayPayloadInEXT RayPayload g_ray; + +void main() { + g_ray.large[43].xy = g_hit; + g_ray.large[71].z = z; +} + +[chitInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 0xffffffff +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorConstBuffer +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 4 +userDataNode[0].next[0].set = 0x00000000 +userDataNode[0].next[0].binding = 0 +userDataNode[0].next[1].type = DescriptorImage +userDataNode[0].next[1].offsetInDwords = 4 +userDataNode[0].next[1].sizeInDwords = 8 +userDataNode[0].next[1].set = 0x00000000 +userDataNode[0].next[1].binding = 1 +userDataNode[1].visibility = 0xffffffff +userDataNode[1].type = DescriptorTableVaPtr +userDataNode[1].offsetInDwords = 1 +userDataNode[1].sizeInDwords = 1 +userDataNode[1].next[0].type = DescriptorConstBufferCompact +userDataNode[1].next[0].offsetInDwords = 0 +userDataNode[1].next[0].sizeInDwords = 2 +userDataNode[1].next[0].set = 0x0000005D +userDataNode[1].next[0].binding = 17 +userDataNode[1].next[1].type = DescriptorConstBuffer +userDataNode[1].next[1].offsetInDwords = 2 +userDataNode[1].next[1].sizeInDwords = 4 +userDataNode[1].next[1].set = 0x0000005D +userDataNode[1].next[1].binding = 0 +userDataNode[1].next[2].type = DescriptorBuffer +userDataNode[1].next[2].offsetInDwords = 6 +userDataNode[1].next[2].sizeInDwords = 4 +userDataNode[1].next[2].set = 0x0000005D +userDataNode[1].next[2].binding = 1 + +[RayTracingPipelineState] +groups[0].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[0].generalShader = 0 +groups[0].closestHitShader = -1 +groups[0].anyHitShader = -1 +groups[0].intersectionShader = -1 +groups[1].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR +groups[1].closestHitShader = 1 +maxRecursionDepth = 1 +indirectStageMask = 0xffffffff +mode = 3 +rtState.bvhResDescSize = 4 +rtState.bvhResDesc[0] = 0 +rtState.bvhResDesc[1] = 2197815296 +rtState.bvhResDesc[2] = 4294967295 +rtState.bvhResDesc[3] = 2164261887 +rtState.nodeStrideShift = 7 +rtState.threadGroupSizeX = 8 +rtState.threadGroupSizeY = 4 +rtState.threadGroupSizeZ = 1 +rtState.rayQueryCsSwizzle = 1 +rtState.ldsStackSize = 16 +rtState.dispatchRaysThreadGroupSize = 32 +rtState.ldsSizePerThreadGroup = 65536 +rtState.outerTileSize = 4 +rtState.dispatchDimSwizzleMode = 0 +rtState.enableDispatchRaysInnerSwizzle = 1 +rtState.enableDispatchRaysOuterSwizzle = 1 +rtState.enableOptimalLdsStackSizeForIndirect = 1 +rtState.enableOptimalLdsStackSizeForUnified = 1 +payloadSizeMaxInLib = 12 +attributeSizeMaxInLib = 8 +hasPipelineLibrary = 1 diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe index 6d51383992..42c26dad85 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -gfxip 11.0 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK %s ; This case will have irreducible CFG after continuation transform. diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_NoPayload.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_NoPayload.pipe new file mode 100644 index 0000000000..4068cf80af --- /dev/null +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_NoPayload.pipe @@ -0,0 +1,194 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +; RUN: amdllpc -gfxip 11.0 -o - -llpc-raytracing-mode=continuations -emit-llvm %s | FileCheck -check-prefixes=CHECK %s + +; CHECK-LABEL: source_filename = "_rgen_1" +; CHECK: !lgc.cps.maxArgumentVgprs = !{[[ARG_BOUND:![0-9]+]]} +; CHECK: [[ARG_BOUND]] = !{i32 70} + +[Version] +version = 70 + +[rgenGlsl] +#version 460 +#extension GL_EXT_ray_tracing : enable + +void main() +{ +} + +[rgenInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 16128 +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 8 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorConstBufferCompact +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 2 +userDataNode[0].next[0].set = 0x0000005D +userDataNode[0].next[0].binding = 17 +userDataNode[0].next[0].strideInDwords = 0 +userDataNode[0].next[1].type = DescriptorConstBuffer +userDataNode[0].next[1].offsetInDwords = 2 +userDataNode[0].next[1].sizeInDwords = 4 +userDataNode[0].next[1].set = 0x0000005D +userDataNode[0].next[1].binding = 0 +userDataNode[0].next[1].strideInDwords = 0 +userDataNode[0].next[2].type = DescriptorBuffer +userDataNode[0].next[2].offsetInDwords = 6 +userDataNode[0].next[2].sizeInDwords = 4 +userDataNode[0].next[2].set = 0x0000005D +userDataNode[0].next[2].binding = 1 +userDataNode[0].next[2].strideInDwords = 0 +userDataNode[1].visibility = 2 +userDataNode[1].type = StreamOutTableVaPtr +userDataNode[1].offsetInDwords = 3 +userDataNode[1].sizeInDwords = 1 +userDataNode[2].visibility = 16128 +userDataNode[2].type = DescriptorTableVaPtr +userDataNode[2].offsetInDwords = 7 +userDataNode[2].sizeInDwords = 1 +userDataNode[2].next[0].type = DescriptorImage +userDataNode[2].next[0].offsetInDwords = 0 +userDataNode[2].next[0].sizeInDwords = 8 +userDataNode[2].next[0].set = 0x00000000 +userDataNode[2].next[0].binding = 0 +userDataNode[2].next[0].strideInDwords = 0 +userDataNode[2].next[1].type = DescriptorConstBuffer +userDataNode[2].next[1].offsetInDwords = 8 +userDataNode[2].next[1].sizeInDwords = 4 +userDataNode[2].next[1].set = 0x00000000 +userDataNode[2].next[1].binding = 1 +userDataNode[2].next[1].strideInDwords = 0 + +[RayTracingPipelineState] +deviceIndex = 0 +options.includeDisassembly = 0 +options.scalarBlockLayout = 1 +options.resourceLayoutScheme = Compact +options.includeIr = 0 +options.robustBufferAccess = 0 +options.reconfigWorkgroupLayout = 0 +options.forceCsThreadIdSwizzling = 0 +options.overrideThreadGroupSizeX = 0 +options.overrideThreadGroupSizeY = 0 +options.overrideThreadGroupSizeZ = 0 +options.shadowDescriptorTableUsage = Disable +options.shadowDescriptorTablePtrHigh = 0 +options.extendedRobustness.robustBufferAccess = 0 +options.extendedRobustness.robustImageAccess = 1 +options.extendedRobustness.nullDescriptor = 0 +options.optimizeTessFactor = 1 +options.optimizationLevel = 2 +options.threadGroupSwizzleMode = Default +options.reverseThreadGroup = 0 +options.enableImplicitInvariantExports = 1 +options.internalRtShaders = 0 +options.forceNonUniformResourceIndexStageMask = 0 +options.replaceSetWithResourceType = 0 +options.disableSampleMask = 0 +options.buildResourcesDataForShaderModule = 0 +options.disableTruncCoordForGather = 1 +options.enableCombinedTexture = 0 +options.vertex64BitsAttribSingleLoc = 0 +options.enableFragColor = 0 +options.disableBaseVertex = 0 +options.enablePrimGeneratedQuery = 0 +options.disablePerCompFetch = 0 +groups[0].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[0].generalShader = 0 +groups[0].closestHitShader = -1 +groups[0].anyHitShader = -1 +groups[0].intersectionShader = -1 +groups[1].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[1].generalShader = 3 +groups[1].closestHitShader = -1 +groups[1].anyHitShader = -1 +groups[1].intersectionShader = -1 +groups[2].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR +groups[2].generalShader = -1 +groups[2].closestHitShader = 2 +groups[2].anyHitShader = 1 +groups[2].intersectionShader = -1 +maxRecursionDepth = 1 +indirectStageMask = 4294967295 +libraryMode = 1 +mode = 1 +rtState.bvhResDescSize = 4 +rtState.bvhResDesc[0] = 0 +rtState.bvhResDesc[1] = 2197815296 +rtState.bvhResDesc[2] = 4294967295 +rtState.bvhResDesc[3] = 2172650495 +rtState.nodeStrideShift = 7 +rtState.staticPipelineFlags = 512 +rtState.triCompressMode = 3 +rtState.pipelineFlags = 8192 +rtState.threadGroupSizeX = 8 +rtState.threadGroupSizeY = 4 +rtState.threadGroupSizeZ = 1 +rtState.boxSortHeuristicMode = 5 +rtState.counterMode = 0 +rtState.counterMask = 0 +rtState.rayQueryCsSwizzle = 1 +rtState.ldsStackSize = 16 +rtState.dispatchRaysThreadGroupSize = 32 +rtState.ldsSizePerThreadGroup = 65536 +rtState.outerTileSize = 4 +rtState.dispatchDimSwizzleMode = 0 +rtState.exportConfig.indirectCallingConvention = 1 +rtState.exportConfig.indirectCalleeSavedRegs.raygen = 2 +rtState.exportConfig.indirectCalleeSavedRegs.miss = 40 +rtState.exportConfig.indirectCalleeSavedRegs.closestHit = 50 +rtState.exportConfig.indirectCalleeSavedRegs.anyHit = 75 +rtState.exportConfig.indirectCalleeSavedRegs.intersection = 75 +rtState.exportConfig.indirectCalleeSavedRegs.callable = 28 +rtState.exportConfig.indirectCalleeSavedRegs.traceRays = 28 +rtState.exportConfig.enableUniformNoReturn = 1 +rtState.exportConfig.enableTraceRayArgsInLds = 0 +rtState.exportConfig.readsDispatchRaysIndex = 0 +rtState.exportConfig.enableDynamicLaunch = 0 +rtState.exportConfig.emitRaytracingShaderDataToken = 0 +rtState.enableRayQueryCsSwizzle = 0 +rtState.enableDispatchRaysInnerSwizzle = 1 +rtState.enableDispatchRaysOuterSwizzle = 1 +rtState.forceInvalidAccelStruct = 0 +rtState.enableRayTracingCounters = 0 +rtState.enableRayTracingHwTraversalStack = 1 +rtState.enableOptimalLdsStackSizeForIndirect = 1 +rtState.enableOptimalLdsStackSizeForUnified = 1 +rtState.maxRayLength = 0 +rtState.enablePickClosestLaneResultForAbortRays = 0 +rtState.traceRayWaveDensityThreshold[8] = 1 +rtState.traceRayWaveDensityThreshold[10] = 1 +rtState.traceRayWaveDensityThreshold[11] = 1 +rtState.traceRayWaveDensityThreshold[12] = 1 +rtState.gpurtFeatureFlags = 0 +rtState.gpurtFuncTable.pFunc[0] = TraceRay2_0 +rtState.gpurtFuncTable.pFunc[1] = TraceRayInline2_0 +rtState.gpurtFuncTable.pFunc[2] = TraceRayUsingHitToken2_0 +rtState.gpurtFuncTable.pFunc[3] = RayQueryProceed2_0 +rtState.gpurtFuncTable.pFunc[4] = GetInstanceIndex +rtState.gpurtFuncTable.pFunc[5] = GetInstanceID +rtState.gpurtFuncTable.pFunc[6] = GetObjectToWorldTransform +rtState.gpurtFuncTable.pFunc[7] = GetWorldToObjectTransform +rtState.gpurtFuncTable.pFunc[8] = GetRayQuery64BitInstanceNodePtr +rtState.gpurtFuncTable.pFunc[9] = TraceLongRayAMD2_0 +rtState.gpurtFuncTable.pFunc[10] = LongRayQueryProceedAMD2_0 +rtState.gpurtFuncTable.pFunc[11] = FetchTrianglePositionFromNodePointer +rtState.gpurtFuncTable.pFunc[12] = FetchTrianglePositionFromRayQuery +rtState.rtIpVersion = 2.0 +rtState.gpurtOverride = 0 +rtState.rtIpOverride = 0 +payloadSizeMaxInLib = 0 +attributeSizeMaxInLib = 0 +hasPipelineLibrary = 0 +pipelineLibStageMask = 0 diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe index 53a1af50cf..1950c9c6b2 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that the ray tracing continuations mode option is set but the pipeline is still compiled in unified mode. ; This ensures pipeline will not get into indirect mode unexpectedly when it can be unified. diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe index f7919eced8..7f2d7d042b 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; RUN: amdllpc -gfxip 11.0 -o - -print-after=prepare-continuations -llpc-raytracing-mode=continuations -emit-lgc %s | FileCheck -check-prefixes=CHECK %s [Version] diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe index 29ff8c2c43..b4f9664954 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that traversal module is not compiled when compiling a pipeline library even it calls TraceRay ; RUN: amdllpc -gfxip 11.0 -o - -emit-lgc %s | FileCheck -check-prefixes=CHECK %s diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestRtIgnoreDeclaredPayloadSize.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestRtIgnoreDeclaredPayloadSize.pipe index 240415501d..43eb7eadee 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestRtIgnoreDeclaredPayloadSize.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestRtIgnoreDeclaredPayloadSize.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that only payloadSizeMaxInLib would be honored when rtIgnoreDeclaredPayloadSize is enable. ; BEGIN_SHADERTEST ; RUN: amdllpc %gfxip -emit-lgc %s -o - | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe index 488e6ac10a..702bc4fbd0 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that the ray tracing static compilation is working. ; Generating the instruction 'image_bvh64_intersect_ray' indicates the trace ray library is linked correctly. diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestWaveSize.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestWaveSize.pipe index 7b7b575466..451d398fc2 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestWaveSize.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestWaveSize.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that the ray tracing pipeline is compiled in Wave32 mode (which is the only one we support so far). ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/ray_tracing/TestContState.rchit b/llpc/test/shaderdb/ray_tracing/TestContState.rchit index 6148e5763b..dfd2525334 100644 --- a/llpc/test/shaderdb/ray_tracing/TestContState.rchit +++ b/llpc/test/shaderdb/ray_tracing/TestContState.rchit @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Test that continuations state has a reasonable size. // NOTE: Hit attribute is extracted from system data, we want to make sure we only put necessary part into continuations diff --git a/llpc/test/shaderdb/ray_tracing/TestContState.rgen b/llpc/test/shaderdb/ray_tracing/TestContState.rgen index 3979e00d25..473c77e04b 100644 --- a/llpc/test/shaderdb/ray_tracing/TestContState.rgen +++ b/llpc/test/shaderdb/ray_tracing/TestContState.rgen @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // Test that payload is not put into continuations state. // RUN: amdllpc %gfxip --llpc-raytracing-mode=continuations --report-cont-state-sizes %s 2>&1 | FileCheck -check-prefix=CHECK %s diff --git a/llpc/test/shaderdb/ray_tracing/TestHitAttribute.rint b/llpc/test/shaderdb/ray_tracing/TestHitAttribute.rint index 1fe4112fc3..8bc18fd0e3 100644 --- a/llpc/test/shaderdb/ray_tracing/TestHitAttribute.rint +++ b/llpc/test/shaderdb/ray_tracing/TestHitAttribute.rint @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; Check that a hit attribute value that is not reported is never written out. diff --git a/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.pipe b/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.pipe index 4e266cabc9..6905984856 100644 --- a/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.pipe +++ b/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // BEGIN_SHADERTEST /* ; RUN: amdllpc --print-after=lgc-lower-gpurt %gfxip 2>&1 %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/ray_tracing/TestPayloadSizes.rgen b/llpc/test/shaderdb/ray_tracing/TestPayloadSizes.rgen index c0d7376e05..6613c0832b 100644 --- a/llpc/test/shaderdb/ray_tracing/TestPayloadSizes.rgen +++ b/llpc/test/shaderdb/ray_tracing/TestPayloadSizes.rgen @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // NOTE : Do not autogenerate // Check that we calculate payload size correctly using scalar alignment (requested by spec) diff --git a/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen b/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen index 9cc5a5e406..a007c84b90 100644 --- a/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen +++ b/llpc/test/shaderdb/ray_tracing/TestProcessGpuRtLibrary.rgen @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // RUN: amdllpc %gfxip --print-after=lower-gpurt-library 2>&1 %s | FileCheck -check-prefix=CHECK %s #version 460 #extension GL_EXT_ray_tracing : enable diff --git a/llpc/test/shaderdb/ray_tracing/lit.local.cfg b/llpc/test/shaderdb/ray_tracing/lit.local.cfg index 995b0dc96b..210a3fb2dc 100644 --- a/llpc/test/shaderdb/ray_tracing/lit.local.cfg +++ b/llpc/test/shaderdb/ray_tracing/lit.local.cfg @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + if "gpurt" not in config.available_features: config.unsupported = True diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rahit b/llpc/test/shaderdb/ray_tracing/standalone.rahit index cb57d2a5a3..4dad340daa 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rahit +++ b/llpc/test/shaderdb/ray_tracing/standalone.rahit @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rcall b/llpc/test/shaderdb/ray_tracing/standalone.rcall index 18800409ed..135a14bd47 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rcall +++ b/llpc/test/shaderdb/ray_tracing/standalone.rcall @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rchit b/llpc/test/shaderdb/ray_tracing/standalone.rchit index 13411728b0..f43ba71fb2 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rchit +++ b/llpc/test/shaderdb/ray_tracing/standalone.rchit @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rgen b/llpc/test/shaderdb/ray_tracing/standalone.rgen index e79bb7a37a..7c740b34da 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rgen +++ b/llpc/test/shaderdb/ray_tracing/standalone.rgen @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rint b/llpc/test/shaderdb/ray_tracing/standalone.rint index fff2c2e48c..b8f6a5e4e6 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rint +++ b/llpc/test/shaderdb/ray_tracing/standalone.rint @@ -1,3 +1,10 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + **********************************************************************************************************************/ + // BEGIN_SHADERTEST /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/ray_tracing/standalone.rmiss b/llpc/test/shaderdb/ray_tracing/standalone.rmiss index 6252aa03a7..ad4a31cbe4 100644 --- a/llpc/test/shaderdb/ray_tracing/standalone.rmiss +++ b/llpc/test/shaderdb/ray_tracing/standalone.rmiss @@ -13,6 +13,8 @@ */ // END_SHADERTEST +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #version 460 #extension GL_EXT_ray_tracing : enable diff --git a/llpc/test/shaderdb/relocatable_shaders/DescPtrSingleSelect.spvasm b/llpc/test/shaderdb/relocatable_shaders/DescPtrSingleSelect.spvasm index 0b4f8a8358..f42bce8124 100644 --- a/llpc/test/shaderdb/relocatable_shaders/DescPtrSingleSelect.spvasm +++ b/llpc/test/shaderdb/relocatable_shaders/DescPtrSingleSelect.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that selecting between spill descriptor pointer and descriptor table descriptor pointer results in just a single scalar select instruction. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineCs_PipelineCacheHit.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineCs_PipelineCacheHit.pipe index c54e135f63..ac7f9b1e83 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineCs_PipelineCacheHit.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineCs_PipelineCacheHit.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that there is a pipeline cache hit when the same pipeline is compiled twice. ; BEGIN_SHADERTEST ; RUN: amdllpc -shader-cache-mode=1 \ diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineCs_TestUnsupportedShader.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineCs_TestUnsupportedShader.pipe index 90cda95785..94273b8090 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineCs_TestUnsupportedShader.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineCs_TestUnsupportedShader.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that a warning is printed when relocatable compilation is requested but not possible. ; Force this compute shader to use whole-pipeline compilation by using the ; `--relocatable-shader-elf-limit` dev flag. diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineGs_BasicRelocGsTest.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineGs_BasicRelocGsTest.pipe index 89316ac4ef..81d68b3dc9 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineGs_BasicRelocGsTest.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineGs_BasicRelocGsTest.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test case checks that a pipeline with geometry shader can be compiled using relocatable shaders. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineTess_RelocRemoveUnusedTcsOutputs.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineTess_RelocRemoveUnusedTcsOutputs.pipe index 2db702b63d..c81dbc7ca9 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineTess_RelocRemoveUnusedTcsOutputs.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineTess_RelocRemoveUnusedTcsOutputs.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that the TCS output at location 0 is removed, and that the mapping of the TES inputs and TCS outputs match. ; BEGIN_SHADERTEST ; RUN: amdllpc \ diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModeFlushToZero.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModeFlushToZero.pipe index 4413aad6d4..bd1be80685 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModeFlushToZero.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModeFlushToZero.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test case checks that the denormal mode is correctly set in the pal metadata. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModePreserve.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModePreserve.pipe index dd01a246f3..cf1ffd5d3c 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModePreserve.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_CheckFloatModePreserve.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test case checks that the denormal mode is correctly set in the pal metadata. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe index 593a9ede3b..d2d7eabefd 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ;RUN: amdllpc -v -gfxip 10.3.0 %s | FileCheck -check-prefix=SHADERTEST %s [Version] diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_FillPsInput.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_FillPsInput.pipe index 3329fa71cb..31543b9db1 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_FillPsInput.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_FillPsInput.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test checks that the PS_INPUT_CNTL entries are continuous. Tests fail ; when they do not start at 0 or there is a gap. diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ImmutableSampler.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ImmutableSampler.pipe index 899c08ff56..d6857a1c76 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ImmutableSampler.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ImmutableSampler.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test case checks that relocatable shaders are disabled if immutable shaders // are present (descriptorRangeValue entry exists). diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultiDwordPushConst.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultiDwordPushConst.pipe index a985ce7748..1024224031 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultiDwordPushConst.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultiDwordPushConst.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that the offset to the push constant area is correct when there are is a multi-dword load. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe index 07e271fdc0..3a0f78100b 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_MultipleConstData.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test case checks that the elf linker places symbols for constant data at the correct offset with the correct size. ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-relocatable-shader-elf -o %t.elf %gfxip %s && llvm-objdump --triple=amdgcn --mcpu=gfx1010 -t %t.elf | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_PipelineCacheHit.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_PipelineCacheHit.pipe index e14ccddc98..ba1de3beb1 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_PipelineCacheHit.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_PipelineCacheHit.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Test that there is a pipeline cache hit when the same pipeline is compiled twice. ; BEGIN_SHADERTEST ; RUN: amdllpc \ diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocCheckPsInControl.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocCheckPsInControl.pipe index a1b21a6831..8f0d6c14ac 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocCheckPsInControl.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocCheckPsInControl.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test case checks that the PS_IN_CONTROL has the correct value for GFX10. ; BEGIN_SHADERTEST diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocMultiView.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocMultiView.pipe index d39411ae14..fe347f1aeb 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocMultiView.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_RelocMultiView.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; Test that the layer input is not output when multiview is enabled. ; For GFX10+, dummy generic output is no longer needed diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTable.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTable.pipe index fbe7b5a731..750574a6a5 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTable.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTable.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test case checks that descriptor offset and descriptor buffer pointer relocation works // for buffer descriptors in a vs/fs pipeline. // Also check that the user data limit is set correctly. diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTableMissingFmask.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTableMissingFmask.pipe index 841de00ea7..a8c486b783 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTableMissingFmask.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_ShadowDescTableMissingFmask.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // This test case checks that descriptor offset and descriptor buffer pointer relocation works // for buffer descriptors in a vs/fs pipeline. // Also check that the user data limit is set correctly. diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableInOutMapping.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableInOutMapping.pipe index 395a479702..66b301c593 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableInOutMapping.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableInOutMapping.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test checks that the mapping for the outputs of the vertex shader ; match the mapping for the inputs of the fragment shader. diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableSeparateCompilation.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableSeparateCompilation.pipe index 7aa1fba073..c476e6e703 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableSeparateCompilation.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_TestRelocatableSeparateCompilation.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test checks that LGC lowering is done for each shader separately. ; It shows that the shaders were compiled individually and then linked. diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_BuiltinExportInPrologue.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_BuiltinExportInPrologue.pipe index 564f78aa37..15a064f169 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_BuiltinExportInPrologue.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_BuiltinExportInPrologue.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // Test that when building relocatable shaders, that the color export shader contains exactly one export, which is for // the depth, and that the done flag is set. diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_PsInput.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_PsInput.pipe index d21db4589f..61951a3f52 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_PsInput.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsPs_PsInput.pipe @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; BEGIN_SHADERTEST ; RUN: amdllpc -enable-relocatable-shader-elf -o %t.elf %gfxip %s -v \ ; RUN: | FileCheck -check-prefix=SHADERTEST %s diff --git a/llpc/test/shaderdb/relocatable_shaders/TriangleVs_CheckNoteSectionForCacheHash.spvasm b/llpc/test/shaderdb/relocatable_shaders/TriangleVs_CheckNoteSectionForCacheHash.spvasm index c90212298f..46a19af8e4 100644 --- a/llpc/test/shaderdb/relocatable_shaders/TriangleVs_CheckNoteSectionForCacheHash.spvasm +++ b/llpc/test/shaderdb/relocatable_shaders/TriangleVs_CheckNoteSectionForCacheHash.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; This test case checks that the 128-bit cache hash is correctly added to the PAL metadata. ; BEGIN_SHADERTEST ; RUN: amdllpc -gfxip=10.1 -unlinked -enable-relocatable-shader-elf -v -o %t.elf %s | FileCheck --check-prefix=CHECK %s diff --git a/llpc/test/shaderdb/relocatable_shaders/VsGs_Reloc.spvasm b/llpc/test/shaderdb/relocatable_shaders/VsGs_Reloc.spvasm index df481b674e..f367342bf5 100644 --- a/llpc/test/shaderdb/relocatable_shaders/VsGs_Reloc.spvasm +++ b/llpc/test/shaderdb/relocatable_shaders/VsGs_Reloc.spvasm @@ -1,3 +1,11 @@ +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ; Check that we can compile Vertex and Geometry stages together, without having to provide a .pipe file. ; BEGIN_SHADERTEST_ST diff --git a/llpc/test/shaderdb/relocatable_shaders/lit.local.cfg b/llpc/test/shaderdb/relocatable_shaders/lit.local.cfg index 086522e9f9..9e4d44368f 100644 --- a/llpc/test/shaderdb/relocatable_shaders/lit.local.cfg +++ b/llpc/test/shaderdb/relocatable_shaders/lit.local.cfg @@ -1,3 +1,27 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### # overwrite %gfxip in config.substitutions config.gfxip = '-gfxip=10.1' diff --git a/llpc/test/shaderdb/xdl/lit.local.cfg b/llpc/test/shaderdb/xdl/lit.local.cfg new file mode 100644 index 0000000000..66598d52d9 --- /dev/null +++ b/llpc/test/shaderdb/xdl/lit.local.cfg @@ -0,0 +1,33 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +# overwrite %gfxip in config.substitutions +config.gfxip = '-gfxip=11.0' + +index = 0; +for substitution in config.substitutions : + if substitution[0] == '%gfxip' : + config.substitutions[index] = ('%gfxip', config.gfxip); + index += 1; diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/base_test.pipe b/llpc/test/tools/UpdateTestChecks/Inputs/base_test.pipe new file mode 100644 index 0000000000..56a702c730 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/base_test.pipe @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: @_amdgpu_vs_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[VERTEXID:%.*]], [[BASEVERTEX:%.*]] +; SHADERTEST-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; SHADERTEST-NEXT: [[TMP2:%.*]] = zext i32 [[VERTEXBUFFERTABLE:%.*]] to i64 +; SHADERTEST-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16, !invariant.load [[META7:![0-9]+]] +; SHADERTEST-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP6]] to float +; SHADERTEST-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP7]] to float +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP9]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float [[TMP10]], float [[TMP11]], float [[TMP12]], float [[TMP13]], i1 false, i1 false) +; SHADERTEST-NEXT: [[TMP14:%.*]] = bitcast i32 [[VIEWID:%.*]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 13, i32 4, float poison, float poison, float [[TMP14]], float poison, i1 true, i1 false) +; SHADERTEST-NEXT: ret void +; +; +; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/check_attributes.pipe.expected b/llpc/test/tools/UpdateTestChecks/Inputs/check_attributes.pipe.expected new file mode 100644 index 0000000000..e92f54c99c --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/check_attributes.pipe.expected @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-attributes --generalize-calls +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST: Function Attrs: alwaysinline nounwind memory(readwrite) +; SHADERTEST-LABEL: @_amdgpu_vs_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[VERTEXID:%.*]], [[BASEVERTEX:%.*]] +; SHADERTEST-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; SHADERTEST-NEXT: [[TMP2:%.*]] = zext i32 [[VERTEXBUFFERTABLE:%.*]] to i64 +; SHADERTEST-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16, !invariant.load [[META7:![0-9]+]] +; SHADERTEST-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP6]] to float +; SHADERTEST-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP7]] to float +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP9]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 12, i32 15, float [[TMP10]], float [[TMP11]], float [[TMP12]], float [[TMP13]], i1 false, i1 false) +; SHADERTEST-NEXT: [[TMP14:%.*]] = bitcast i32 [[VIEWID:%.*]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 13, i32 4, float poison, float poison, float [[TMP14]], float poison, i1 true, i1 false) +; SHADERTEST-NEXT: ret void +; +; +; SHADERTEST: Function Attrs: alwaysinline nounwind memory(readwrite) +; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr{{.*}}(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/check_globals.pipe.expected b/llpc/test/tools/UpdateTestChecks/Inputs/check_globals.pipe.expected new file mode 100644 index 0000000000..7ec83e2f65 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/check_globals.pipe.expected @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-globals --generalize-calls +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: @_amdgpu_vs_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[VERTEXID:%.*]], [[BASEVERTEX:%.*]] +; SHADERTEST-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; SHADERTEST-NEXT: [[TMP2:%.*]] = zext i32 [[VERTEXBUFFERTABLE:%.*]] to i64 +; SHADERTEST-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16, !invariant.load [[META7:![0-9]+]] +; SHADERTEST-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP6]] to float +; SHADERTEST-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP7]] to float +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP9]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 12, i32 15, float [[TMP10]], float [[TMP11]], float [[TMP12]], float [[TMP13]], i1 false, i1 false) +; SHADERTEST-NEXT: [[TMP14:%.*]] = bitcast i32 [[VIEWID:%.*]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 13, i32 4, float poison, float poison, float [[TMP14]], float poison, i1 true, i1 false) +; SHADERTEST-NEXT: ret void +; +; +; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr{{.*}}(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; +;. +; SHADERTEST: attributes #[[ATTR0:[0-9]+]] = { alwaysinline nounwind memory(readwrite) "amdgpu-memory-bound"="false" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize32,+cumode,+enable-flat-scratch" } +; SHADERTEST: attributes #[[ATTR1:[0-9]+]] = { alwaysinline nounwind memory(readwrite) "InitialPSInputAddr"="0" "amdgpu-color-export"="1" "amdgpu-depth-export"="0" "amdgpu-memory-bound"="false" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } +; SHADERTEST: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) } +; SHADERTEST: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +; SHADERTEST: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;. +; SHADERTEST: [[META0:![0-9]+]] = !{!"Vulkan"} +; SHADERTEST: [[META1:![0-9]+]] = !{i32 -456748678, i32 696546207, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800, i32 0, i32 0, i32 1} +; SHADERTEST: [[META2:![0-9]+]] = !{i32 -1516779780, i32 289691743, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800, i32 0, i32 0, i32 1} +; SHADERTEST: [[META3:![0-9]+]] = !{i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 64, i32 32} +; SHADERTEST: [[META4:![0-9]+]] = !{!"\82\B0amdpal.pipelines{{.*}}amdpal.version\92\03\00"} +; SHADERTEST: [[META5:![0-9]+]] = !{i32 0} +; SHADERTEST: [[META6:![0-9]+]] = !{i32 1} +; SHADERTEST: [[META7]] = !{} +; SHADERTEST: [[META8:![0-9]+]] = !{i32 4} +; SHADERTEST: [[META9:![0-9]+]] = !{i32 6} +;. diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/check_globals_all.pipe.expected b/llpc/test/tools/UpdateTestChecks/Inputs/check_globals_all.pipe.expected new file mode 100644 index 0000000000..7ec83e2f65 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/check_globals_all.pipe.expected @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-globals --generalize-calls +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: @_amdgpu_vs_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[VERTEXID:%.*]], [[BASEVERTEX:%.*]] +; SHADERTEST-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; SHADERTEST-NEXT: [[TMP2:%.*]] = zext i32 [[VERTEXBUFFERTABLE:%.*]] to i64 +; SHADERTEST-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16, !invariant.load [[META7:![0-9]+]] +; SHADERTEST-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP6]] to float +; SHADERTEST-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP7]] to float +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP9]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 12, i32 15, float [[TMP10]], float [[TMP11]], float [[TMP12]], float [[TMP13]], i1 false, i1 false) +; SHADERTEST-NEXT: [[TMP14:%.*]] = bitcast i32 [[VIEWID:%.*]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 13, i32 4, float poison, float poison, float [[TMP14]], float poison, i1 true, i1 false) +; SHADERTEST-NEXT: ret void +; +; +; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr{{.*}}(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; +;. +; SHADERTEST: attributes #[[ATTR0:[0-9]+]] = { alwaysinline nounwind memory(readwrite) "amdgpu-memory-bound"="false" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize32,+cumode,+enable-flat-scratch" } +; SHADERTEST: attributes #[[ATTR1:[0-9]+]] = { alwaysinline nounwind memory(readwrite) "InitialPSInputAddr"="0" "amdgpu-color-export"="1" "amdgpu-depth-export"="0" "amdgpu-memory-bound"="false" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } +; SHADERTEST: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) } +; SHADERTEST: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +; SHADERTEST: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;. +; SHADERTEST: [[META0:![0-9]+]] = !{!"Vulkan"} +; SHADERTEST: [[META1:![0-9]+]] = !{i32 -456748678, i32 696546207, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800, i32 0, i32 0, i32 1} +; SHADERTEST: [[META2:![0-9]+]] = !{i32 -1516779780, i32 289691743, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800, i32 0, i32 0, i32 1} +; SHADERTEST: [[META3:![0-9]+]] = !{i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 64, i32 32} +; SHADERTEST: [[META4:![0-9]+]] = !{!"\82\B0amdpal.pipelines{{.*}}amdpal.version\92\03\00"} +; SHADERTEST: [[META5:![0-9]+]] = !{i32 0} +; SHADERTEST: [[META6:![0-9]+]] = !{i32 1} +; SHADERTEST: [[META7]] = !{} +; SHADERTEST: [[META8:![0-9]+]] = !{i32 4} +; SHADERTEST: [[META9:![0-9]+]] = !{i32 6} +;. diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/check_globals_smart.pipe.expected b/llpc/test/tools/UpdateTestChecks/Inputs/check_globals_smart.pipe.expected new file mode 100644 index 0000000000..ae4b3560a2 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/check_globals_smart.pipe.expected @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-globals smart --generalize-calls +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: @_amdgpu_vs_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[VERTEXID:%.*]], [[BASEVERTEX:%.*]] +; SHADERTEST-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; SHADERTEST-NEXT: [[TMP2:%.*]] = zext i32 [[VERTEXBUFFERTABLE:%.*]] to i64 +; SHADERTEST-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16, !invariant.load [[META7:![0-9]+]] +; SHADERTEST-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP6]] to float +; SHADERTEST-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP7]] to float +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP9]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 12, i32 15, float [[TMP10]], float [[TMP11]], float [[TMP12]], float [[TMP13]], i1 false, i1 false) +; SHADERTEST-NEXT: [[TMP14:%.*]] = bitcast i32 [[VIEWID:%.*]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 13, i32 4, float poison, float poison, float [[TMP14]], float poison, i1 true, i1 false) +; SHADERTEST-NEXT: ret void +; +; +; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr{{.*}}(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; +;. +; SHADERTEST: [[META7]] = !{} +;. diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe b/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe new file mode 100644 index 0000000000..eb9dfb6bbc --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe @@ -0,0 +1,365 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-pal-metadata +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -filetype=asm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: amdgpu_vs_main: +; SHADERTEST: s_getpc_b64 s[4:5] +; SHADERTEST-NEXT: s_mov_b32 s0, s3 +; SHADERTEST-NEXT: s_mov_b32 s3, s5 +; SHADERTEST-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; SHADERTEST-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 +; SHADERTEST-NEXT: v_mov_b32_e32 v4, s1 +; SHADERTEST-NEXT: s_waitcnt lgkmcnt(0) +; SHADERTEST-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[4:7], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen +; SHADERTEST-NEXT: s_waitcnt vmcnt(0) +; SHADERTEST-NEXT: exp pos0 v0, v1, v2, v3 +; SHADERTEST-NEXT: exp pos1 off, off, v4, off done +; SHADERTEST-NEXT: s_endpgm +; +; SHADERTEST-LABEL: amdgpu_ps_main: +; SHADERTEST: v_mov_b32_e32 v0, 0 +; SHADERTEST-NEXT: exp mrt0 v0, v0, v0, v0 done compr vm +; SHADERTEST-NEXT: s_endpgm +; +; SHADERTEST-LABEL: .amdgpu_pal_metadata +; SHADERTEST-NEXT: --- +; SHADERTEST-NEXT: amdpal.pipelines: +; SHADERTEST-NEXT: - .api: Vulkan +; SHADERTEST-NEXT: .graphics_registers: +; SHADERTEST-NEXT: .aa_coverage_to_shader_select: InputCoverage +; SHADERTEST-NEXT: .cb_shader_mask: +; SHADERTEST-NEXT: .output0_enable: 0xf +; SHADERTEST-NEXT: .output1_enable: 0 +; SHADERTEST-NEXT: .output2_enable: 0 +; SHADERTEST-NEXT: .output3_enable: 0 +; SHADERTEST-NEXT: .output4_enable: 0 +; SHADERTEST-NEXT: .output5_enable: 0 +; SHADERTEST-NEXT: .output6_enable: 0 +; SHADERTEST-NEXT: .output7_enable: 0 +; SHADERTEST-NEXT: .db_shader_control: +; SHADERTEST-NEXT: .alpha_to_mask_disable: true +; SHADERTEST-NEXT: .conservative_z_export: 0 +; SHADERTEST-NEXT: .depth_before_shader: 0 +; SHADERTEST-NEXT: .exec_on_hier_fail: false +; SHADERTEST-NEXT: .exec_on_noop: false +; SHADERTEST-NEXT: .kill_enable: false +; SHADERTEST-NEXT: .mask_export_enable: false +; SHADERTEST-NEXT: .pre_shader_depth_coverage_enable: 0 +; SHADERTEST-NEXT: .primitive_ordered_pixel_shader: false +; SHADERTEST-NEXT: .stencil_test_val_export_enable: 0 +; SHADERTEST-NEXT: .z_export_enable: 0 +; SHADERTEST-NEXT: .z_order: 0x1 +; SHADERTEST-NEXT: .ia_multi_vgt_param: +; SHADERTEST-NEXT: .primgroup_size: 0x7f +; SHADERTEST-NEXT: .pa_cl_clip_cntl: +; SHADERTEST-NEXT: .dx_linear_attr_clip_ena: true +; SHADERTEST-NEXT: .rasterization_kill: false +; SHADERTEST-NEXT: .vte_vport_provoke_disable: false +; SHADERTEST-NEXT: .pa_cl_vs_out_cntl: +; SHADERTEST-NEXT: .use_vtx_point_size: false +; SHADERTEST-NEXT: .use_vtx_render_target_indx: true +; SHADERTEST-NEXT: .use_vtx_viewport_indx: false +; SHADERTEST-NEXT: .vs_out_misc_side_bus_ena: true +; SHADERTEST-NEXT: .vs_out_misc_vec_ena: true +; SHADERTEST-NEXT: .pa_cl_vte_cntl: +; SHADERTEST-NEXT: .vtx_w0_fmt: true +; SHADERTEST-NEXT: .x_offset_ena: true +; SHADERTEST-NEXT: .x_scale_ena: true +; SHADERTEST-NEXT: .y_offset_ena: true +; SHADERTEST-NEXT: .y_scale_ena: true +; SHADERTEST-NEXT: .z_offset_ena: true +; SHADERTEST-NEXT: .z_scale_ena: true +; SHADERTEST-NEXT: .pa_sc_shader_control: +; SHADERTEST-NEXT: .wave_break_region_size: 0 +; SHADERTEST-NEXT: .pa_su_vtx_cntl: +; SHADERTEST-NEXT: .pix_center: 0x1 +; SHADERTEST-NEXT: .quant_mode: 0x5 +; SHADERTEST-NEXT: .round_mode: 0x2 +; SHADERTEST-NEXT: .ps_extra_lds_size: 0 +; SHADERTEST-NEXT: .ps_iter_sample: false +; SHADERTEST-NEXT: .spi_baryc_cntl: +; SHADERTEST-NEXT: .front_face_all_bits: true +; SHADERTEST-NEXT: .pos_float_location: 0 +; SHADERTEST-NEXT: .spi_ps_in_control: +; SHADERTEST-NEXT: .num_interps: 0 +; SHADERTEST-NEXT: .num_prim_interp: 0 +; SHADERTEST-NEXT: .ps_w32_en: false +; SHADERTEST-NEXT: .spi_ps_input_addr: +; SHADERTEST-NEXT: .ancillary_ena: false +; SHADERTEST-NEXT: .front_face_ena: false +; SHADERTEST-NEXT: .line_stipple_tex_ena: false +; SHADERTEST-NEXT: .linear_center_ena: false +; SHADERTEST-NEXT: .linear_centroid_ena: false +; SHADERTEST-NEXT: .linear_sample_ena: false +; SHADERTEST-NEXT: .persp_center_ena: false +; SHADERTEST-NEXT: .persp_centroid_ena: false +; SHADERTEST-NEXT: .persp_pull_model_ena: false +; SHADERTEST-NEXT: .persp_sample_ena: true +; SHADERTEST-NEXT: .pos_fixed_pt_ena: false +; SHADERTEST-NEXT: .pos_w_float_ena: false +; SHADERTEST-NEXT: .pos_x_float_ena: false +; SHADERTEST-NEXT: .pos_y_float_ena: false +; SHADERTEST-NEXT: .pos_z_float_ena: false +; SHADERTEST-NEXT: .sample_coverage_ena: false +; SHADERTEST-NEXT: .spi_ps_input_cntl: +; SHADERTEST-NEXT: - .attr0_valid: 0 +; SHADERTEST-NEXT: .attr1_valid: 0 +; SHADERTEST-NEXT: .flat_shade: false +; SHADERTEST-NEXT: .fp16_interp_mode: false +; SHADERTEST-NEXT: .offset: 0 +; SHADERTEST-NEXT: .prim_attr: false +; SHADERTEST-NEXT: .pt_sprite_tex: false +; SHADERTEST-NEXT: .spi_ps_input_ena: +; SHADERTEST-NEXT: .ancillary_ena: false +; SHADERTEST-NEXT: .front_face_ena: false +; SHADERTEST-NEXT: .line_stipple_tex_ena: false +; SHADERTEST-NEXT: .linear_center_ena: false +; SHADERTEST-NEXT: .linear_centroid_ena: false +; SHADERTEST-NEXT: .linear_sample_ena: false +; SHADERTEST-NEXT: .persp_center_ena: false +; SHADERTEST-NEXT: .persp_centroid_ena: false +; SHADERTEST-NEXT: .persp_pull_model_ena: false +; SHADERTEST-NEXT: .persp_sample_ena: true +; SHADERTEST-NEXT: .pos_fixed_pt_ena: false +; SHADERTEST-NEXT: .pos_w_float_ena: false +; SHADERTEST-NEXT: .pos_x_float_ena: false +; SHADERTEST-NEXT: .pos_y_float_ena: false +; SHADERTEST-NEXT: .pos_z_float_ena: false +; SHADERTEST-NEXT: .sample_coverage_ena: false +; SHADERTEST-NEXT: .spi_shader_col_format: +; SHADERTEST-NEXT: .col_0_export_format: 0x4 +; SHADERTEST-NEXT: .col_1_export_format: 0 +; SHADERTEST-NEXT: .col_2_export_format: 0 +; SHADERTEST-NEXT: .col_3_export_format: 0 +; SHADERTEST-NEXT: .col_4_export_format: 0 +; SHADERTEST-NEXT: .col_5_export_format: 0 +; SHADERTEST-NEXT: .col_6_export_format: 0 +; SHADERTEST-NEXT: .col_7_export_format: 0 +; SHADERTEST-NEXT: .spi_shader_pos_format: +; SHADERTEST-NEXT: - 0x4 +; SHADERTEST-NEXT: - 0x4 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: .spi_vs_out_config: +; SHADERTEST-NEXT: .no_pc_export: true +; SHADERTEST-NEXT: .vgt_reuse_off: false +; SHADERTEST-NEXT: .vgt_shader_stages_en: +; SHADERTEST-NEXT: .max_primgroup_in_wave: 0x2 +; SHADERTEST-NEXT: .vs_stage_en: 0 +; SHADERTEST-NEXT: .vs_w32_en: true +; SHADERTEST-NEXT: .vgt_strmout_buffer_config: +; SHADERTEST-NEXT: .stream_0_buffer_en: 0 +; SHADERTEST-NEXT: .stream_1_buffer_en: 0 +; SHADERTEST-NEXT: .stream_2_buffer_en: 0 +; SHADERTEST-NEXT: .stream_3_buffer_en: 0 +; SHADERTEST-NEXT: .vgt_strmout_config: +; SHADERTEST-NEXT: .streamout_0_en: false +; SHADERTEST-NEXT: .streamout_1_en: false +; SHADERTEST-NEXT: .streamout_2_en: false +; SHADERTEST-NEXT: .streamout_3_en: false +; SHADERTEST-NEXT: .vs_so_base0_en: false +; SHADERTEST-NEXT: .vs_so_base1_en: false +; SHADERTEST-NEXT: .vs_so_base2_en: false +; SHADERTEST-NEXT: .vs_so_base3_en: false +; SHADERTEST-NEXT: .vs_streamout_en: false +; SHADERTEST-NEXT: .hardware_stages: +; SHADERTEST-NEXT: .ps: +; SHADERTEST-NEXT: .checksum_value: 0xb4d39ea3 +; SHADERTEST-NEXT: .debug_mode: false +; SHADERTEST-NEXT: .entry_point: _amdgpu_ps_main +; SHADERTEST-NEXT: .float_mode: 0xc0 +; SHADERTEST-NEXT: .ieee_mode: false +; SHADERTEST-NEXT: .lds_size: 0 +; SHADERTEST-NEXT: .mem_ordered: true +; SHADERTEST-NEXT: .scratch_en: false +; SHADERTEST-NEXT: .scratch_memory_size: 0 +; SHADERTEST-NEXT: .sgpr_count: 0x2 +; SHADERTEST-NEXT: .sgpr_limit: 0x6a +; SHADERTEST-NEXT: .trap_present: 0 +; SHADERTEST-NEXT: .user_data_reg_map: +; SHADERTEST-NEXT: - 0x10000000 +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: .user_sgprs: 0x1 +; SHADERTEST-NEXT: .uses_uavs: false +; SHADERTEST-NEXT: .vgpr_count: 0x2 +; SHADERTEST-NEXT: .vgpr_limit: 0x100 +; SHADERTEST-NEXT: .wavefront_size: 0x40 +; SHADERTEST-NEXT: .wgp_mode: false +; SHADERTEST-NEXT: .writes_depth: 0 +; SHADERTEST-NEXT: .writes_uavs: false +; SHADERTEST-NEXT: .vs: +; SHADERTEST-NEXT: .checksum_value: 0xcd42e2e5 +; SHADERTEST-NEXT: .debug_mode: false +; SHADERTEST-NEXT: .entry_point: _amdgpu_vs_main +; SHADERTEST-NEXT: .float_mode: 0xc0 +; SHADERTEST-NEXT: .ieee_mode: false +; SHADERTEST-NEXT: .lds_size: 0 +; SHADERTEST-NEXT: .mem_ordered: true +; SHADERTEST-NEXT: .scratch_en: false +; SHADERTEST-NEXT: .scratch_memory_size: 0 +; SHADERTEST-NEXT: .sgpr_count: 0x8 +; SHADERTEST-NEXT: .sgpr_limit: 0x6a +; SHADERTEST-NEXT: .trap_present: 0 +; SHADERTEST-NEXT: .user_data_reg_map: +; SHADERTEST-NEXT: - 0x10000000 +; SHADERTEST-NEXT: - 0x1000000b +; SHADERTEST-NEXT: - 0x1000000f +; SHADERTEST-NEXT: - 0x10000003 +; SHADERTEST-NEXT: - 0x10000004 +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: .user_sgprs: 0x5 +; SHADERTEST-NEXT: .vgpr_count: 0x5 +; SHADERTEST-NEXT: .vgpr_limit: 0x100 +; SHADERTEST-NEXT: .wavefront_size: 0x20 +; SHADERTEST-NEXT: .wgp_mode: false +; SHADERTEST-NEXT: .internal_pipeline_hash: +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: .num_interpolants: 0x1 +; SHADERTEST-NEXT: .registers: {} +; SHADERTEST-NEXT: .shaders: +; SHADERTEST-NEXT: .pixel: +; SHADERTEST-NEXT: .api_shader_hash: +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: .hardware_mapping: +; SHADERTEST-NEXT: - .ps +; SHADERTEST-NEXT: .vertex: +; SHADERTEST-NEXT: .api_shader_hash: +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: .hardware_mapping: +; SHADERTEST-NEXT: - .vs +; SHADERTEST-NEXT: .spill_threshold: 0xffff +; SHADERTEST-NEXT: .streamout_vertex_strides: +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: .type: VsPs +; SHADERTEST-NEXT: .user_data_limit: 0x1 +; SHADERTEST-NEXT: .xgl_cache_info: +; SHADERTEST-NEXT: .128_bit_cache_hash: +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: .llpc_version: {{.*}} +; SHADERTEST-NEXT: amdpal.version: +; SHADERTEST-NEXT: - 0x3 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: ... diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe.expected b/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe.expected new file mode 100644 index 0000000000..eb9dfb6bbc --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/check_pal_metadata.pipe.expected @@ -0,0 +1,365 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --check-pal-metadata +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -filetype=asm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: amdgpu_vs_main: +; SHADERTEST: s_getpc_b64 s[4:5] +; SHADERTEST-NEXT: s_mov_b32 s0, s3 +; SHADERTEST-NEXT: s_mov_b32 s3, s5 +; SHADERTEST-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; SHADERTEST-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 +; SHADERTEST-NEXT: v_mov_b32_e32 v4, s1 +; SHADERTEST-NEXT: s_waitcnt lgkmcnt(0) +; SHADERTEST-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[4:7], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen +; SHADERTEST-NEXT: s_waitcnt vmcnt(0) +; SHADERTEST-NEXT: exp pos0 v0, v1, v2, v3 +; SHADERTEST-NEXT: exp pos1 off, off, v4, off done +; SHADERTEST-NEXT: s_endpgm +; +; SHADERTEST-LABEL: amdgpu_ps_main: +; SHADERTEST: v_mov_b32_e32 v0, 0 +; SHADERTEST-NEXT: exp mrt0 v0, v0, v0, v0 done compr vm +; SHADERTEST-NEXT: s_endpgm +; +; SHADERTEST-LABEL: .amdgpu_pal_metadata +; SHADERTEST-NEXT: --- +; SHADERTEST-NEXT: amdpal.pipelines: +; SHADERTEST-NEXT: - .api: Vulkan +; SHADERTEST-NEXT: .graphics_registers: +; SHADERTEST-NEXT: .aa_coverage_to_shader_select: InputCoverage +; SHADERTEST-NEXT: .cb_shader_mask: +; SHADERTEST-NEXT: .output0_enable: 0xf +; SHADERTEST-NEXT: .output1_enable: 0 +; SHADERTEST-NEXT: .output2_enable: 0 +; SHADERTEST-NEXT: .output3_enable: 0 +; SHADERTEST-NEXT: .output4_enable: 0 +; SHADERTEST-NEXT: .output5_enable: 0 +; SHADERTEST-NEXT: .output6_enable: 0 +; SHADERTEST-NEXT: .output7_enable: 0 +; SHADERTEST-NEXT: .db_shader_control: +; SHADERTEST-NEXT: .alpha_to_mask_disable: true +; SHADERTEST-NEXT: .conservative_z_export: 0 +; SHADERTEST-NEXT: .depth_before_shader: 0 +; SHADERTEST-NEXT: .exec_on_hier_fail: false +; SHADERTEST-NEXT: .exec_on_noop: false +; SHADERTEST-NEXT: .kill_enable: false +; SHADERTEST-NEXT: .mask_export_enable: false +; SHADERTEST-NEXT: .pre_shader_depth_coverage_enable: 0 +; SHADERTEST-NEXT: .primitive_ordered_pixel_shader: false +; SHADERTEST-NEXT: .stencil_test_val_export_enable: 0 +; SHADERTEST-NEXT: .z_export_enable: 0 +; SHADERTEST-NEXT: .z_order: 0x1 +; SHADERTEST-NEXT: .ia_multi_vgt_param: +; SHADERTEST-NEXT: .primgroup_size: 0x7f +; SHADERTEST-NEXT: .pa_cl_clip_cntl: +; SHADERTEST-NEXT: .dx_linear_attr_clip_ena: true +; SHADERTEST-NEXT: .rasterization_kill: false +; SHADERTEST-NEXT: .vte_vport_provoke_disable: false +; SHADERTEST-NEXT: .pa_cl_vs_out_cntl: +; SHADERTEST-NEXT: .use_vtx_point_size: false +; SHADERTEST-NEXT: .use_vtx_render_target_indx: true +; SHADERTEST-NEXT: .use_vtx_viewport_indx: false +; SHADERTEST-NEXT: .vs_out_misc_side_bus_ena: true +; SHADERTEST-NEXT: .vs_out_misc_vec_ena: true +; SHADERTEST-NEXT: .pa_cl_vte_cntl: +; SHADERTEST-NEXT: .vtx_w0_fmt: true +; SHADERTEST-NEXT: .x_offset_ena: true +; SHADERTEST-NEXT: .x_scale_ena: true +; SHADERTEST-NEXT: .y_offset_ena: true +; SHADERTEST-NEXT: .y_scale_ena: true +; SHADERTEST-NEXT: .z_offset_ena: true +; SHADERTEST-NEXT: .z_scale_ena: true +; SHADERTEST-NEXT: .pa_sc_shader_control: +; SHADERTEST-NEXT: .wave_break_region_size: 0 +; SHADERTEST-NEXT: .pa_su_vtx_cntl: +; SHADERTEST-NEXT: .pix_center: 0x1 +; SHADERTEST-NEXT: .quant_mode: 0x5 +; SHADERTEST-NEXT: .round_mode: 0x2 +; SHADERTEST-NEXT: .ps_extra_lds_size: 0 +; SHADERTEST-NEXT: .ps_iter_sample: false +; SHADERTEST-NEXT: .spi_baryc_cntl: +; SHADERTEST-NEXT: .front_face_all_bits: true +; SHADERTEST-NEXT: .pos_float_location: 0 +; SHADERTEST-NEXT: .spi_ps_in_control: +; SHADERTEST-NEXT: .num_interps: 0 +; SHADERTEST-NEXT: .num_prim_interp: 0 +; SHADERTEST-NEXT: .ps_w32_en: false +; SHADERTEST-NEXT: .spi_ps_input_addr: +; SHADERTEST-NEXT: .ancillary_ena: false +; SHADERTEST-NEXT: .front_face_ena: false +; SHADERTEST-NEXT: .line_stipple_tex_ena: false +; SHADERTEST-NEXT: .linear_center_ena: false +; SHADERTEST-NEXT: .linear_centroid_ena: false +; SHADERTEST-NEXT: .linear_sample_ena: false +; SHADERTEST-NEXT: .persp_center_ena: false +; SHADERTEST-NEXT: .persp_centroid_ena: false +; SHADERTEST-NEXT: .persp_pull_model_ena: false +; SHADERTEST-NEXT: .persp_sample_ena: true +; SHADERTEST-NEXT: .pos_fixed_pt_ena: false +; SHADERTEST-NEXT: .pos_w_float_ena: false +; SHADERTEST-NEXT: .pos_x_float_ena: false +; SHADERTEST-NEXT: .pos_y_float_ena: false +; SHADERTEST-NEXT: .pos_z_float_ena: false +; SHADERTEST-NEXT: .sample_coverage_ena: false +; SHADERTEST-NEXT: .spi_ps_input_cntl: +; SHADERTEST-NEXT: - .attr0_valid: 0 +; SHADERTEST-NEXT: .attr1_valid: 0 +; SHADERTEST-NEXT: .flat_shade: false +; SHADERTEST-NEXT: .fp16_interp_mode: false +; SHADERTEST-NEXT: .offset: 0 +; SHADERTEST-NEXT: .prim_attr: false +; SHADERTEST-NEXT: .pt_sprite_tex: false +; SHADERTEST-NEXT: .spi_ps_input_ena: +; SHADERTEST-NEXT: .ancillary_ena: false +; SHADERTEST-NEXT: .front_face_ena: false +; SHADERTEST-NEXT: .line_stipple_tex_ena: false +; SHADERTEST-NEXT: .linear_center_ena: false +; SHADERTEST-NEXT: .linear_centroid_ena: false +; SHADERTEST-NEXT: .linear_sample_ena: false +; SHADERTEST-NEXT: .persp_center_ena: false +; SHADERTEST-NEXT: .persp_centroid_ena: false +; SHADERTEST-NEXT: .persp_pull_model_ena: false +; SHADERTEST-NEXT: .persp_sample_ena: true +; SHADERTEST-NEXT: .pos_fixed_pt_ena: false +; SHADERTEST-NEXT: .pos_w_float_ena: false +; SHADERTEST-NEXT: .pos_x_float_ena: false +; SHADERTEST-NEXT: .pos_y_float_ena: false +; SHADERTEST-NEXT: .pos_z_float_ena: false +; SHADERTEST-NEXT: .sample_coverage_ena: false +; SHADERTEST-NEXT: .spi_shader_col_format: +; SHADERTEST-NEXT: .col_0_export_format: 0x4 +; SHADERTEST-NEXT: .col_1_export_format: 0 +; SHADERTEST-NEXT: .col_2_export_format: 0 +; SHADERTEST-NEXT: .col_3_export_format: 0 +; SHADERTEST-NEXT: .col_4_export_format: 0 +; SHADERTEST-NEXT: .col_5_export_format: 0 +; SHADERTEST-NEXT: .col_6_export_format: 0 +; SHADERTEST-NEXT: .col_7_export_format: 0 +; SHADERTEST-NEXT: .spi_shader_pos_format: +; SHADERTEST-NEXT: - 0x4 +; SHADERTEST-NEXT: - 0x4 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: .spi_vs_out_config: +; SHADERTEST-NEXT: .no_pc_export: true +; SHADERTEST-NEXT: .vgt_reuse_off: false +; SHADERTEST-NEXT: .vgt_shader_stages_en: +; SHADERTEST-NEXT: .max_primgroup_in_wave: 0x2 +; SHADERTEST-NEXT: .vs_stage_en: 0 +; SHADERTEST-NEXT: .vs_w32_en: true +; SHADERTEST-NEXT: .vgt_strmout_buffer_config: +; SHADERTEST-NEXT: .stream_0_buffer_en: 0 +; SHADERTEST-NEXT: .stream_1_buffer_en: 0 +; SHADERTEST-NEXT: .stream_2_buffer_en: 0 +; SHADERTEST-NEXT: .stream_3_buffer_en: 0 +; SHADERTEST-NEXT: .vgt_strmout_config: +; SHADERTEST-NEXT: .streamout_0_en: false +; SHADERTEST-NEXT: .streamout_1_en: false +; SHADERTEST-NEXT: .streamout_2_en: false +; SHADERTEST-NEXT: .streamout_3_en: false +; SHADERTEST-NEXT: .vs_so_base0_en: false +; SHADERTEST-NEXT: .vs_so_base1_en: false +; SHADERTEST-NEXT: .vs_so_base2_en: false +; SHADERTEST-NEXT: .vs_so_base3_en: false +; SHADERTEST-NEXT: .vs_streamout_en: false +; SHADERTEST-NEXT: .hardware_stages: +; SHADERTEST-NEXT: .ps: +; SHADERTEST-NEXT: .checksum_value: 0xb4d39ea3 +; SHADERTEST-NEXT: .debug_mode: false +; SHADERTEST-NEXT: .entry_point: _amdgpu_ps_main +; SHADERTEST-NEXT: .float_mode: 0xc0 +; SHADERTEST-NEXT: .ieee_mode: false +; SHADERTEST-NEXT: .lds_size: 0 +; SHADERTEST-NEXT: .mem_ordered: true +; SHADERTEST-NEXT: .scratch_en: false +; SHADERTEST-NEXT: .scratch_memory_size: 0 +; SHADERTEST-NEXT: .sgpr_count: 0x2 +; SHADERTEST-NEXT: .sgpr_limit: 0x6a +; SHADERTEST-NEXT: .trap_present: 0 +; SHADERTEST-NEXT: .user_data_reg_map: +; SHADERTEST-NEXT: - 0x10000000 +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: .user_sgprs: 0x1 +; SHADERTEST-NEXT: .uses_uavs: false +; SHADERTEST-NEXT: .vgpr_count: 0x2 +; SHADERTEST-NEXT: .vgpr_limit: 0x100 +; SHADERTEST-NEXT: .wavefront_size: 0x40 +; SHADERTEST-NEXT: .wgp_mode: false +; SHADERTEST-NEXT: .writes_depth: 0 +; SHADERTEST-NEXT: .writes_uavs: false +; SHADERTEST-NEXT: .vs: +; SHADERTEST-NEXT: .checksum_value: 0xcd42e2e5 +; SHADERTEST-NEXT: .debug_mode: false +; SHADERTEST-NEXT: .entry_point: _amdgpu_vs_main +; SHADERTEST-NEXT: .float_mode: 0xc0 +; SHADERTEST-NEXT: .ieee_mode: false +; SHADERTEST-NEXT: .lds_size: 0 +; SHADERTEST-NEXT: .mem_ordered: true +; SHADERTEST-NEXT: .scratch_en: false +; SHADERTEST-NEXT: .scratch_memory_size: 0 +; SHADERTEST-NEXT: .sgpr_count: 0x8 +; SHADERTEST-NEXT: .sgpr_limit: 0x6a +; SHADERTEST-NEXT: .trap_present: 0 +; SHADERTEST-NEXT: .user_data_reg_map: +; SHADERTEST-NEXT: - 0x10000000 +; SHADERTEST-NEXT: - 0x1000000b +; SHADERTEST-NEXT: - 0x1000000f +; SHADERTEST-NEXT: - 0x10000003 +; SHADERTEST-NEXT: - 0x10000004 +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: - 0xffffffff +; SHADERTEST-NEXT: .user_sgprs: 0x5 +; SHADERTEST-NEXT: .vgpr_count: 0x5 +; SHADERTEST-NEXT: .vgpr_limit: 0x100 +; SHADERTEST-NEXT: .wavefront_size: 0x20 +; SHADERTEST-NEXT: .wgp_mode: false +; SHADERTEST-NEXT: .internal_pipeline_hash: +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: .num_interpolants: 0x1 +; SHADERTEST-NEXT: .registers: {} +; SHADERTEST-NEXT: .shaders: +; SHADERTEST-NEXT: .pixel: +; SHADERTEST-NEXT: .api_shader_hash: +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: .hardware_mapping: +; SHADERTEST-NEXT: - .ps +; SHADERTEST-NEXT: .vertex: +; SHADERTEST-NEXT: .api_shader_hash: +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: .hardware_mapping: +; SHADERTEST-NEXT: - .vs +; SHADERTEST-NEXT: .spill_threshold: 0xffff +; SHADERTEST-NEXT: .streamout_vertex_strides: +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: .type: VsPs +; SHADERTEST-NEXT: .user_data_limit: 0x1 +; SHADERTEST-NEXT: .xgl_cache_info: +; SHADERTEST-NEXT: .128_bit_cache_hash: +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: - 0x{{[0-9a-f]+}} +; SHADERTEST-NEXT: .llpc_version: {{.*}} +; SHADERTEST-NEXT: amdpal.version: +; SHADERTEST-NEXT: - 0x3 +; SHADERTEST-NEXT: - 0 +; SHADERTEST-NEXT: ... diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/function.pipe.expected b/llpc/test/tools/UpdateTestChecks/Inputs/function.pipe.expected new file mode 100644 index 0000000000..5ffd9b6431 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/function.pipe.expected @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function _amdgpu_ps_main +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/function_signature.pipe.expected b/llpc/test/tools/UpdateTestChecks/Inputs/function_signature.pipe.expected new file mode 100644 index 0000000000..cdc49ea432 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/function_signature.pipe.expected @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --function-signature --generalize-calls +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: define {{[^@]+}}@_amdgpu_vs_main +; SHADERTEST-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[VIEWID:%.*]], i32 inreg noundef [[VERTEXBUFFERTABLE:%.*]], i32 inreg noundef [[BASEVERTEX:%.*]], i32 inreg noundef [[BASEINSTANCE:%.*]], i32 noundef [[VERTEXID:%.*]], i32 noundef [[RELVERTEXID:%.*]], i32 noundef [[PRIMITIVEID:%.*]], i32 noundef [[INSTANCEID:%.*]]) #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META5:![0-9]+]] !lgc.shaderstage [[META6:![0-9]+]] { +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[VERTEXID]], [[BASEVERTEX]] +; SHADERTEST-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; SHADERTEST-NEXT: [[TMP2:%.*]] = zext i32 [[VERTEXBUFFERTABLE]] to i64 +; SHADERTEST-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16, !invariant.load [[META7:![0-9]+]] +; SHADERTEST-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP6]] to float +; SHADERTEST-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP7]] to float +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP9]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 12, i32 15, float [[TMP10]], float [[TMP11]], float [[TMP12]], float [[TMP13]], i1 false, i1 false) +; SHADERTEST-NEXT: [[TMP14:%.*]] = bitcast i32 [[VIEWID]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 13, i32 4, float poison, float poison, float [[TMP14]], float poison, i1 true, i1 false) +; SHADERTEST-NEXT: ret void +; +; +; SHADERTEST-LABEL: define {{[^@]+}}@_amdgpu_ps_main +; SHADERTEST-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[PRIMMASK:%.*]], <2 x float> noundef [[PERSPINTERPSAMPLE:%.*]], <2 x float> noundef [[PERSPINTERPCENTER:%.*]], <2 x float> noundef [[PERSPINTERPCENTROID:%.*]], <3 x float> noundef [[PERSPINTERPPULLMODE:%.*]], <2 x float> noundef [[LINEARINTERPSAMPLE:%.*]], <2 x float> noundef [[LINEARINTERPCENTER:%.*]], <2 x float> noundef [[LINEARINTERPCENTROID:%.*]], float noundef [[LINESTIPPLE:%.*]], float noundef [[FRAGCOORDX:%.*]], float noundef [[FRAGCOORDY:%.*]], float noundef [[FRAGCOORDZ:%.*]], float noundef [[FRAGCOORDW:%.*]], i32 noundef [[FRONTFACING:%.*]], i32 noundef [[ANCILLARY:%.*]], i32 noundef [[SAMPLECOVERAGE:%.*]], i32 noundef [[FIXEDXY:%.*]]) #[[ATTR1:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr{{.*}}(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/generalize_calls.base.lgc b/llpc/test/tools/UpdateTestChecks/Inputs/generalize_calls.base.lgc new file mode 100644 index 0000000000..71e4e51268 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/generalize_calls.base.lgc @@ -0,0 +1,94 @@ +; Test that invalid image descriptor patching is applied where required. + +; RUN: lgc -mcpu=gfx1010 -passes=lgc-apply-workarounds -o - - <%s 2>&1 | FileCheck --check-prefixes=CHECK,GFX1010 %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; ModuleID = 'lgcPipeline' +source_filename = "lgcPipeline" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" +target triple = "amdgcn--amdpal" + +; Function Attrs: nounwind +define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !0 { +.entry: + %.desc.ptr2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 3) + %.desc.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc.ptr2 to i8 addrspace(4)* + %.desc.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc.ptr1, i64 0 + %.desc.ptr = bitcast i8 addrspace(4)* %.desc.ptr0 to <8 x i32> addrspace(4)* + %.sampler.ptr = call <4 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 2, i32 0, i32 13) + + %.load = call <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, i32 1) + call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <2 x i32> zeroinitializer) + + %.sample = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, i32 1, <2 x float> zeroinitializer) + %.gather = call <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00) + + %.atomic = call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 128, i32 0, <8 x i32> addrspace(4)* %.desc.ptr, i32 0, i32 1) #0 + + %.lod = call <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, <2 x float> zeroinitializer) + + %.query.size = call <2 x i32> (...) @lgc.create.image.query.size.v2i32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, i32 0) + %.query.levels = call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr) + + %lane = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) ; just some source of divergence + %ofs = mul i32 %lane, 32 + + ; Use a waterfall loop with last.use to test that is also handled correctly + %.desc2.ptr2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 4) + %.desc2.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc2.ptr2 to i8 addrspace(4)* + %.desc2.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc2.ptr1, i32 %ofs + %.desc2.ptr = bitcast i8 addrspace(4)* %.desc2.ptr0 to <8 x i32> addrspace(4)* + call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, <8 x i32> addrspace(4)* %.desc2.ptr, i32 zeroinitializer) + + ret void +} + +declare <8 x i32> addrspace(4)* @lgc.create.get.desc.ptr.p4v8i32(...) #1 +declare <4 x i32> addrspace(4)* @lgc.create.get.desc.ptr.p4v4i32(...) #1 +declare <4 x float> @lgc.create.image.load.v4f32(...) #1 +declare void @lgc.create.image.store(...) #2 +declare <4 x float> @lgc.create.image.sample.v4f32(...) #1 +declare <4 x float> @lgc.create.image.gather.v4f32(...) #1 +declare i32 @lgc.create.image.atomic.i32(...) #0 +declare <2 x float> @lgc.create.image.get.lod.v2f32(...) #0 +declare <2 x i32> @lgc.create.image.query.size.v2i32(...) #0 +declare i32 @lgc.create.image.query.levels.i32(...) #0 + +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind writeonly } + +!0 = !{i32 1} + +!lgc.unlinked = !{!0} +!lgc.user.data.nodes = !{!1,!2,!3,!4} + +!1 = !{!"DescriptorTableVaPtr", i32 0, i32 0, i32 0, i32 1, i32 3} +!2 = !{!"DescriptorResource", i32 1, i32 0, i32 0, i32 8, i32 3, i32 3, i32 8} +!3 = !{!"DescriptorResource", i32 1, i32 0, i32 8, i32 8, i32 3, i32 4, i32 8} +!4 = !{!"DescriptorSampler", i32 2, i32 0, i32 16, i32 4, i32 0, i32 13, i32 4} diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/generalize_calls.lgc.expected b/llpc/test/tools/UpdateTestChecks/Inputs/generalize_calls.lgc.expected new file mode 100644 index 0000000000..d7a38b363b --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/generalize_calls.lgc.expected @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --version 5 --generalize-calls +; Test that invalid image descriptor patching is applied where required. + +; RUN: lgc -mcpu=gfx1010 -passes=lgc-apply-workarounds -o - - <%s 2>&1 | FileCheck --check-prefixes=CHECK,GFX1010 %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +; ModuleID = 'lgcPipeline' +source_filename = "lgcPipeline" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" +target triple = "amdgcn--amdpal" + +; Function Attrs: nounwind +define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !0 { +.entry: + %.desc.ptr2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 3) + %.desc.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc.ptr2 to i8 addrspace(4)* + %.desc.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc.ptr1, i64 0 + %.desc.ptr = bitcast i8 addrspace(4)* %.desc.ptr0 to <8 x i32> addrspace(4)* + %.sampler.ptr = call <4 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 2, i32 0, i32 13) + + %.load = call <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, i32 1) + call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <2 x i32> zeroinitializer) + + %.sample = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, i32 1, <2 x float> zeroinitializer) + %.gather = call <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00) + + %.atomic = call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 128, i32 0, <8 x i32> addrspace(4)* %.desc.ptr, i32 0, i32 1) #0 + + %.lod = call <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, <2 x float> zeroinitializer) + + %.query.size = call <2 x i32> (...) @lgc.create.image.query.size.v2i32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, i32 0) + %.query.levels = call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr) + + %lane = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) ; just some source of divergence + %ofs = mul i32 %lane, 32 + + ; Use a waterfall loop with last.use to test that is also handled correctly + %.desc2.ptr2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 4) + %.desc2.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc2.ptr2 to i8 addrspace(4)* + %.desc2.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc2.ptr1, i32 %ofs + %.desc2.ptr = bitcast i8 addrspace(4)* %.desc2.ptr0 to <8 x i32> addrspace(4)* + call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, <8 x i32> addrspace(4)* %.desc2.ptr, i32 zeroinitializer) + + ret void +} + +declare <8 x i32> addrspace(4)* @lgc.create.get.desc.ptr.p4v8i32(...) #1 +declare <4 x i32> addrspace(4)* @lgc.create.get.desc.ptr.p4v4i32(...) #1 +declare <4 x float> @lgc.create.image.load.v4f32(...) #1 +declare void @lgc.create.image.store(...) #2 +declare <4 x float> @lgc.create.image.sample.v4f32(...) #1 +declare <4 x float> @lgc.create.image.gather.v4f32(...) #1 +declare i32 @lgc.create.image.atomic.i32(...) #0 +declare <2 x float> @lgc.create.image.get.lod.v2f32(...) #0 +declare <2 x i32> @lgc.create.image.query.size.v2i32(...) #0 +declare i32 @lgc.create.image.query.levels.i32(...) #0 + +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind writeonly } + +!0 = !{i32 1} + +!lgc.unlinked = !{!0} +!lgc.user.data.nodes = !{!1,!2,!3,!4} + +!1 = !{!"DescriptorTableVaPtr", i32 0, i32 0, i32 0, i32 1, i32 3} +!2 = !{!"DescriptorResource", i32 1, i32 0, i32 0, i32 8, i32 3, i32 3, i32 8} +!3 = !{!"DescriptorResource", i32 1, i32 0, i32 8, i32 8, i32 3, i32 4, i32 8} +!4 = !{!"DescriptorSampler", i32 2, i32 0, i32 16, i32 4, i32 0, i32 13, i32 4} +; CHECK-LABEL: define dllexport spir_func void @lgc.shader.VS.main( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !lgc.shaderstage [[META0:![0-9]+]] { +; CHECK-NEXT: [[_ENTRY:.*:]] +; CHECK-NEXT: [[DOTDESC_PTR2:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc{{.*}}(i32 1, i32 1, i32 3, i32 3) +; CHECK-NEXT: [[DOTDESC_PTR1:%.*]] = bitcast ptr addrspace(4) [[DOTDESC_PTR2]] to ptr addrspace(4) +; CHECK-NEXT: [[DOTDESC_PTR0:%.*]] = getelementptr i8, ptr addrspace(4) [[DOTDESC_PTR1]], i64 0 +; CHECK-NEXT: [[DOTDESC_PTR:%.*]] = bitcast ptr addrspace(4) [[DOTDESC_PTR0]] to ptr addrspace(4) +; CHECK-NEXT: [[DOTSAMPLER_PTR:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc{{.*}}(i32 2, i32 2, i32 0, i32 13) +; CHECK-NEXT: [[DOTLOAD:%.*]] = call <4 x float> (...) @lgc.create.image.load{{.*}}(i32 0, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], i32 1) +; CHECK-NEXT: call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], <2 x i32> zeroinitializer) +; CHECK-NEXT: [[DOTSAMPLE:%.*]] = call <4 x float> (...) @lgc.create.image.sample{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], i32 1, <2 x float> zeroinitializer) +; CHECK-NEXT: [[DOTGATHER:%.*]] = call <4 x float> (...) @lgc.create.image.gather{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00) +; CHECK-NEXT: [[DOTATOMIC:%.*]] = call i32 (...) @lgc.create.image.atomic{{.*}}(i32 2, i32 0, i32 128, i32 0, ptr addrspace(4) [[DOTDESC_PTR]], i32 0, i32 1) #[[ATTR0]] +; CHECK-NEXT: [[DOTLOD:%.*]] = call <2 x float> (...) @lgc.create.image.get.lod{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[DOTQUERY_SIZE:%.*]] = call <2 x i32> (...) @lgc.create.image.query.size{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], i32 0) +; CHECK-NEXT: [[DOTQUERY_LEVELS:%.*]] = call i32 (...) @lgc.create.image.query.levels{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]]) +; CHECK-NEXT: [[LANE:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; CHECK-NEXT: [[OFS:%.*]] = mul i32 [[LANE]], 32 +; CHECK-NEXT: [[DOTDESC2_PTR2:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc{{.*}}(i32 1, i32 1, i32 3, i32 4) +; CHECK-NEXT: [[DOTDESC2_PTR1:%.*]] = bitcast ptr addrspace(4) [[DOTDESC2_PTR2]] to ptr addrspace(4) +; CHECK-NEXT: [[DOTDESC2_PTR0:%.*]] = getelementptr i8, ptr addrspace(4) [[DOTDESC2_PTR1]], i32 [[OFS]] +; CHECK-NEXT: [[DOTDESC2_PTR:%.*]] = bitcast ptr addrspace(4) [[DOTDESC2_PTR0]] to ptr addrspace(4) +; CHECK-NEXT: call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, ptr addrspace(4) [[DOTDESC2_PTR]], i32 0) +; CHECK-NEXT: ret void +; +; +; GFX1010-LABEL: define dllexport spir_func void @lgc.shader.VS.main( +; GFX1010-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !lgc.shaderstage [[META0:![0-9]+]] { +; GFX1010-NEXT: [[_ENTRY:.*:]] +; GFX1010-NEXT: [[DOTDESC_PTR2:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc{{.*}}(i32 1, i32 1, i32 3, i32 3) +; GFX1010-NEXT: [[DOTDESC_PTR1:%.*]] = bitcast ptr addrspace(4) [[DOTDESC_PTR2]] to ptr addrspace(4) +; GFX1010-NEXT: [[DOTDESC_PTR0:%.*]] = getelementptr i8, ptr addrspace(4) [[DOTDESC_PTR1]], i64 0 +; GFX1010-NEXT: [[DOTDESC_PTR:%.*]] = bitcast ptr addrspace(4) [[DOTDESC_PTR0]] to ptr addrspace(4) +; GFX1010-NEXT: [[DOTSAMPLER_PTR:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc{{.*}}(i32 2, i32 2, i32 0, i32 13) +; GFX1010-NEXT: [[DOTLOAD:%.*]] = call <4 x float> (...) @lgc.create.image.load{{.*}}(i32 0, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], i32 1) +; GFX1010-NEXT: call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], <2 x i32> zeroinitializer) +; GFX1010-NEXT: [[DOTSAMPLE:%.*]] = call <4 x float> (...) @lgc.create.image.sample{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], i32 1, <2 x float> zeroinitializer) +; GFX1010-NEXT: [[DOTGATHER:%.*]] = call <4 x float> (...) @lgc.create.image.gather{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00) +; GFX1010-NEXT: [[DOTATOMIC:%.*]] = call i32 (...) @lgc.create.image.atomic{{.*}}(i32 2, i32 0, i32 128, i32 0, ptr addrspace(4) [[DOTDESC_PTR]], i32 0, i32 1) #[[ATTR0]] +; GFX1010-NEXT: [[DOTLOD:%.*]] = call <2 x float> (...) @lgc.create.image.get.lod{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], <2 x float> zeroinitializer) +; GFX1010-NEXT: [[DOTQUERY_SIZE:%.*]] = call <2 x i32> (...) @lgc.create.image.query.size{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], i32 0) +; GFX1010-NEXT: [[DOTQUERY_LEVELS:%.*]] = call i32 (...) @lgc.create.image.query.levels{{.*}}(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]]) +; GFX1010-NEXT: [[LANE:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX1010-NEXT: [[OFS:%.*]] = mul i32 [[LANE]], 32 +; GFX1010-NEXT: [[DOTDESC2_PTR2:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc{{.*}}(i32 1, i32 1, i32 3, i32 4) +; GFX1010-NEXT: [[DOTDESC2_PTR1:%.*]] = bitcast ptr addrspace(4) [[DOTDESC2_PTR2]] to ptr addrspace(4) +; GFX1010-NEXT: [[DOTDESC2_PTR0:%.*]] = getelementptr i8, ptr addrspace(4) [[DOTDESC2_PTR1]], i32 [[OFS]] +; GFX1010-NEXT: [[DOTDESC2_PTR:%.*]] = bitcast ptr addrspace(4) [[DOTDESC2_PTR0]] to ptr addrspace(4) +; GFX1010-NEXT: call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, ptr addrspace(4) [[DOTDESC2_PTR]], i32 0) +; GFX1010-NEXT: ret void +; +;. +; CHECK: [[META0]] = !{i32 1} +;. diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/no_generalize_calls.lgc.expected b/llpc/test/tools/UpdateTestChecks/Inputs/no_generalize_calls.lgc.expected new file mode 100644 index 0000000000..98db2c440c --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/no_generalize_calls.lgc.expected @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc --version 5 +; Test that invalid image descriptor patching is applied where required. + +; RUN: lgc -mcpu=gfx1010 -passes=lgc-apply-workarounds -o - - <%s 2>&1 | FileCheck --check-prefixes=CHECK,GFX1010 %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +; ModuleID = 'lgcPipeline' +source_filename = "lgcPipeline" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" +target triple = "amdgcn--amdpal" + +; Function Attrs: nounwind +define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc.shaderstage !0 { +.entry: + %.desc.ptr2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 3) + %.desc.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc.ptr2 to i8 addrspace(4)* + %.desc.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc.ptr1, i64 0 + %.desc.ptr = bitcast i8 addrspace(4)* %.desc.ptr0 to <8 x i32> addrspace(4)* + %.sampler.ptr = call <4 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 2, i32 0, i32 13) + + %.load = call <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, i32 1) + call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <2 x i32> zeroinitializer) + + %.sample = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, i32 1, <2 x float> zeroinitializer) + %.gather = call <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00) + + %.atomic = call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 128, i32 0, <8 x i32> addrspace(4)* %.desc.ptr, i32 0, i32 1) #0 + + %.lod = call <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, <2 x float> zeroinitializer) + + %.query.size = call <2 x i32> (...) @lgc.create.image.query.size.v2i32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, i32 0) + %.query.levels = call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr) + + %lane = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) ; just some source of divergence + %ofs = mul i32 %lane, 32 + + ; Use a waterfall loop with last.use to test that is also handled correctly + %.desc2.ptr2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 4) + %.desc2.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc2.ptr2 to i8 addrspace(4)* + %.desc2.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc2.ptr1, i32 %ofs + %.desc2.ptr = bitcast i8 addrspace(4)* %.desc2.ptr0 to <8 x i32> addrspace(4)* + call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, <8 x i32> addrspace(4)* %.desc2.ptr, i32 zeroinitializer) + + ret void +} + +declare <8 x i32> addrspace(4)* @lgc.create.get.desc.ptr.p4v8i32(...) #1 +declare <4 x i32> addrspace(4)* @lgc.create.get.desc.ptr.p4v4i32(...) #1 +declare <4 x float> @lgc.create.image.load.v4f32(...) #1 +declare void @lgc.create.image.store(...) #2 +declare <4 x float> @lgc.create.image.sample.v4f32(...) #1 +declare <4 x float> @lgc.create.image.gather.v4f32(...) #1 +declare i32 @lgc.create.image.atomic.i32(...) #0 +declare <2 x float> @lgc.create.image.get.lod.v2f32(...) #0 +declare <2 x i32> @lgc.create.image.query.size.v2i32(...) #0 +declare i32 @lgc.create.image.query.levels.i32(...) #0 + +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind writeonly } + +!0 = !{i32 1} + +!lgc.unlinked = !{!0} +!lgc.user.data.nodes = !{!1,!2,!3,!4} + +!1 = !{!"DescriptorTableVaPtr", i32 0, i32 0, i32 0, i32 1, i32 3} +!2 = !{!"DescriptorResource", i32 1, i32 0, i32 0, i32 8, i32 3, i32 3, i32 8} +!3 = !{!"DescriptorResource", i32 1, i32 0, i32 8, i32 8, i32 3, i32 4, i32 8} +!4 = !{!"DescriptorSampler", i32 2, i32 0, i32 16, i32 4, i32 0, i32 13, i32 4} +; CHECK-LABEL: define dllexport spir_func void @lgc.shader.VS.main( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !lgc.shaderstage [[META0:![0-9]+]] { +; CHECK-NEXT: [[_ENTRY:.*:]] +; CHECK-NEXT: [[DOTDESC_PTR2:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 3) +; CHECK-NEXT: [[DOTDESC_PTR1:%.*]] = bitcast ptr addrspace(4) [[DOTDESC_PTR2]] to ptr addrspace(4) +; CHECK-NEXT: [[DOTDESC_PTR0:%.*]] = getelementptr i8, ptr addrspace(4) [[DOTDESC_PTR1]], i64 0 +; CHECK-NEXT: [[DOTDESC_PTR:%.*]] = bitcast ptr addrspace(4) [[DOTDESC_PTR0]] to ptr addrspace(4) +; CHECK-NEXT: [[DOTSAMPLER_PTR:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 2, i32 0, i32 13) +; CHECK-NEXT: [[DOTLOAD:%.*]] = call <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], i32 1) +; CHECK-NEXT: call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], <2 x i32> zeroinitializer) +; CHECK-NEXT: [[DOTSAMPLE:%.*]] = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], i32 1, <2 x float> zeroinitializer) +; CHECK-NEXT: [[DOTGATHER:%.*]] = call <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00) +; CHECK-NEXT: [[DOTATOMIC:%.*]] = call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 128, i32 0, ptr addrspace(4) [[DOTDESC_PTR]], i32 0, i32 1) #[[ATTR0]] +; CHECK-NEXT: [[DOTLOD:%.*]] = call <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], <2 x float> zeroinitializer) +; CHECK-NEXT: [[DOTQUERY_SIZE:%.*]] = call <2 x i32> (...) @lgc.create.image.query.size.v2i32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], i32 0) +; CHECK-NEXT: [[DOTQUERY_LEVELS:%.*]] = call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]]) +; CHECK-NEXT: [[LANE:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; CHECK-NEXT: [[OFS:%.*]] = mul i32 [[LANE]], 32 +; CHECK-NEXT: [[DOTDESC2_PTR2:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 4) +; CHECK-NEXT: [[DOTDESC2_PTR1:%.*]] = bitcast ptr addrspace(4) [[DOTDESC2_PTR2]] to ptr addrspace(4) +; CHECK-NEXT: [[DOTDESC2_PTR0:%.*]] = getelementptr i8, ptr addrspace(4) [[DOTDESC2_PTR1]], i32 [[OFS]] +; CHECK-NEXT: [[DOTDESC2_PTR:%.*]] = bitcast ptr addrspace(4) [[DOTDESC2_PTR0]] to ptr addrspace(4) +; CHECK-NEXT: call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, ptr addrspace(4) [[DOTDESC2_PTR]], i32 0) +; CHECK-NEXT: ret void +; +; +; GFX1010-LABEL: define dllexport spir_func void @lgc.shader.VS.main( +; GFX1010-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !lgc.shaderstage [[META0:![0-9]+]] { +; GFX1010-NEXT: [[_ENTRY:.*:]] +; GFX1010-NEXT: [[DOTDESC_PTR2:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 3) +; GFX1010-NEXT: [[DOTDESC_PTR1:%.*]] = bitcast ptr addrspace(4) [[DOTDESC_PTR2]] to ptr addrspace(4) +; GFX1010-NEXT: [[DOTDESC_PTR0:%.*]] = getelementptr i8, ptr addrspace(4) [[DOTDESC_PTR1]], i64 0 +; GFX1010-NEXT: [[DOTDESC_PTR:%.*]] = bitcast ptr addrspace(4) [[DOTDESC_PTR0]] to ptr addrspace(4) +; GFX1010-NEXT: [[DOTSAMPLER_PTR:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 2, i32 0, i32 13) +; GFX1010-NEXT: [[DOTLOAD:%.*]] = call <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], i32 1) +; GFX1010-NEXT: call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], <2 x i32> zeroinitializer) +; GFX1010-NEXT: [[DOTSAMPLE:%.*]] = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], i32 1, <2 x float> zeroinitializer) +; GFX1010-NEXT: [[DOTGATHER:%.*]] = call <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00) +; GFX1010-NEXT: [[DOTATOMIC:%.*]] = call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 128, i32 0, ptr addrspace(4) [[DOTDESC_PTR]], i32 0, i32 1) #[[ATTR0]] +; GFX1010-NEXT: [[DOTLOD:%.*]] = call <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], ptr addrspace(4) [[DOTSAMPLER_PTR]], <2 x float> zeroinitializer) +; GFX1010-NEXT: [[DOTQUERY_SIZE:%.*]] = call <2 x i32> (...) @lgc.create.image.query.size.v2i32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]], i32 0) +; GFX1010-NEXT: [[DOTQUERY_LEVELS:%.*]] = call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, ptr addrspace(4) [[DOTDESC_PTR]]) +; GFX1010-NEXT: [[LANE:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) +; GFX1010-NEXT: [[OFS:%.*]] = mul i32 [[LANE]], 32 +; GFX1010-NEXT: [[DOTDESC2_PTR2:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 3, i32 4) +; GFX1010-NEXT: [[DOTDESC2_PTR1:%.*]] = bitcast ptr addrspace(4) [[DOTDESC2_PTR2]] to ptr addrspace(4) +; GFX1010-NEXT: [[DOTDESC2_PTR0:%.*]] = getelementptr i8, ptr addrspace(4) [[DOTDESC2_PTR1]], i32 [[OFS]] +; GFX1010-NEXT: [[DOTDESC2_PTR:%.*]] = bitcast ptr addrspace(4) [[DOTDESC2_PTR0]] to ptr addrspace(4) +; GFX1010-NEXT: call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, ptr addrspace(4) [[DOTDESC2_PTR]], i32 0) +; GFX1010-NEXT: ret void +; +;. +; CHECK: [[META0]] = !{i32 1} +;. diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe b/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe new file mode 100644 index 0000000000..3200056da7 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: @_amdgpu_vs_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[VERTEXID:%.*]], [[BASEVERTEX:%.*]] +; SHADERTEST-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; SHADERTEST-NEXT: [[TMP2:%.*]] = zext i32 [[VERTEXBUFFERTABLE:%.*]] to i64 +; SHADERTEST-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16, !invariant.load [[META7:![0-9]+]] +; SHADERTEST-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP6]] to float +; SHADERTEST-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP7]] to float +; SHADERTEST-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float [[TMP10]], float [[TMP11]], float [[TMP12]], float [[TMP12]], i1 false, i1 false) +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[VIEWID:%.*]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 13, i32 4, float poison, float poison, float [[TMP13]], float poison, i1 true, i1 false) +; SHADERTEST-NEXT: ret void +; +; +; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe.expected b/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe.expected new file mode 100644 index 0000000000..02b794b71a --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe.expected @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --generalize-calls +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: @_amdgpu_vs_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[VERTEXID:%.*]], [[BASEVERTEX:%.*]] +; SHADERTEST-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; SHADERTEST-NEXT: [[TMP2:%.*]] = zext i32 [[VERTEXBUFFERTABLE:%.*]] to i64 +; SHADERTEST-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16, !invariant.load [[META7:![0-9]+]] +; SHADERTEST-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP6]] to float +; SHADERTEST-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP7]] to float +; SHADERTEST-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP14]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 12, i32 15, float [[TMP9]], float [[TMP10]], float [[TMP11]], float [[TMP13]], i1 false, i1 false) +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[VIEWID:%.*]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 13, i32 4, float poison, float poison, float [[TMP12]], float poison, i1 true, i1 false) +; SHADERTEST-NEXT: ret void +; +; +; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr{{.*}}(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; diff --git a/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe.expected.reset b/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe.expected.reset new file mode 100644 index 0000000000..187d08f542 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/Inputs/stable_ir_values.pipe.expected.reset @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --generalize-calls +; Test that the layer input is not output when multiview is enabled. +; For GFX10+, dummy generic output is no longer needed +; If multiview is enabled, it will fail to build with relocatable ELF. + +; RUN: amdllpc -enable-relocatable-shader-elf -emit-llvm -o - -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +;; Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ;; + + +[Version] +version = 52 + +[VsGlsl] +#version 450 + +layout(location = 0) in vec4 _17; + +void main() +{ + gl_Position = _17; +} + +[VsInfo] +entryPoint = main + +[FsGlsl] +#version 450 + +layout(location = 0) out vec4 _9; + +void main() +{ + _9 = vec4(0,0,0,0); +} + +[FsInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 1 +userDataNode[0].type = IndirectUserDataVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].indirectUserDataCount = 8 + +[GraphicsPipelineState] +topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP +enableMultiView = 1 +colorBuffer[0].format = VK_FORMAT_R8G8B8A8_UNORM +colorBuffer[0].channelWriteMask = 15 +colorBuffer[0].blendEnable = 0 +colorBuffer[0].blendSrcAlphaToColor = 1 + + +[VertexInputState] +binding[0].binding = 0 +binding[0].stride = 16 +binding[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX +attribute[0].location = 0 +attribute[0].binding = 0 +attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT +attribute[0].offset = 0 +; SHADERTEST-LABEL: @_amdgpu_vs_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[VERTEXID:%.*]], [[BASEVERTEX:%.*]] +; SHADERTEST-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 +; SHADERTEST-NEXT: [[TMP2:%.*]] = zext i32 [[VERTEXBUFFERTABLE:%.*]] to i64 +; SHADERTEST-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16, !invariant.load [[META7:![0-9]+]] +; SHADERTEST-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 0, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 4, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 8, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.tbuffer.load{{.*}}(<4 x i32> [[TMP5]], i32 [[VERTEXINDEX]], i32 12, i32 0, i32 22, i32 0), !invariant.load [[META7]] +; SHADERTEST-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP6]] to float +; SHADERTEST-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP7]] to float +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP8]] to float +; SHADERTEST-NEXT: [[TMP13:%.*]] = bitcast i32 [[TMP9]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 12, i32 15, float [[TMP10]], float [[TMP11]], float [[TMP12]], float [[TMP13]], i1 false, i1 false) +; SHADERTEST-NEXT: [[TMP14:%.*]] = bitcast i32 [[VIEWID:%.*]] to float +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp{{.*}}(i32 13, i32 4, float poison, float poison, float [[TMP14]], float poison, i1 true, i1 false) +; SHADERTEST-NEXT: ret void +; +; +; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr{{.*}}(i32 0, i32 15, <2 x half> zeroinitializer, <2 x half> zeroinitializer, i1 true, i1 true) +; SHADERTEST-NEXT: ret void +; diff --git a/llpc/test/tools/UpdateTestChecks/check_attributes.test b/llpc/test/tools/UpdateTestChecks/check_attributes.test new file mode 100644 index 0000000000..6137f4f200 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/check_attributes.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/base_test.pipe %t.pipe && %update_llpc_test_checks %t.pipe --check-attributes --generalize-calls +# RUN: diff -u %t.pipe %S/Inputs/check_attributes.pipe.expected diff --git a/llpc/test/tools/UpdateTestChecks/check_function.test b/llpc/test/tools/UpdateTestChecks/check_function.test new file mode 100644 index 0000000000..6fa1ad9841 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/check_function.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/base_test.pipe %t.pipe && %update_llpc_test_checks %t.pipe --function _amdgpu_ps_main +# RUN: diff -u %t.pipe %S/Inputs/function.pipe.expected diff --git a/llpc/test/tools/UpdateTestChecks/check_function_signature.test b/llpc/test/tools/UpdateTestChecks/check_function_signature.test new file mode 100644 index 0000000000..b1e28fb79f --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/check_function_signature.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/base_test.pipe %t.pipe && %update_llpc_test_checks %t.pipe --function-signature --generalize-calls +# RUN: diff -u %t.pipe %S/Inputs/function_signature.pipe.expected diff --git a/llpc/test/tools/UpdateTestChecks/check_globals.test b/llpc/test/tools/UpdateTestChecks/check_globals.test new file mode 100644 index 0000000000..5d9f2892f5 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/check_globals.test @@ -0,0 +1,6 @@ +# RUN: cp -f %S/Inputs/base_test.pipe %t.pipe && %update_llpc_test_checks %t.pipe --check-globals --version 1 --generalize-calls +# RUN: diff -u %t.pipe %S/Inputs/check_globals.pipe.expected +# RUN: cp -f %S/Inputs/base_test.pipe %t.pipe && %update_llpc_test_checks %t.pipe --check-globals all --generalize-calls +# RUN: diff -u %t.pipe %S/Inputs/check_globals_all.pipe.expected +# RUN: cp -f %S/Inputs/base_test.pipe %t.pipe && %update_llpc_test_checks %t.pipe --check-globals smart --generalize-calls +# RUN: diff -u %t.pipe %S/Inputs/check_globals_smart.pipe.expected diff --git a/llpc/test/tools/UpdateTestChecks/check_pal_metadata.test b/llpc/test/tools/UpdateTestChecks/check_pal_metadata.test new file mode 100644 index 0000000000..aa2405e000 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/check_pal_metadata.test @@ -0,0 +1,3 @@ +# REQUIRES: do-not-run-me +# RUN: cp -f %S/Inputs/check_pal_metadata.pipe %t.pipe && %update_llpc_test_checks %t.pipe --check-pal-metadata +# RUN: diff -u %t.pipe %S/Inputs/check_pal_metadata.pipe.expected diff --git a/llpc/test/tools/UpdateTestChecks/lit.local.cfg b/llpc/test/tools/UpdateTestChecks/lit.local.cfg new file mode 100644 index 0000000000..6286e5f890 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/lit.local.cfg @@ -0,0 +1,64 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +import os + +import lit.formats +import lit.util + +from shlex import quote as shell_quote + +# llpc_src_root +config.llpc_src_root = os.path.abspath(os.path.join(config.test_source_root, '..', '..')) + +# llpc_tool_root for the update_llpc_test_checks.py +config.llpc_tool_root = os.path.join(config.llpc_src_root, 'tool') + +# We only test this one test script for now +script_path = os.path.join(config.llpc_tool_root, "update_llpc_test_checks.py") +assert os.path.isfile(script_path) + +def add_update_script_substitution( + name, python_exe=config.python_executable, extra_args="" +): + assert name.startswith("%") + # Specify an explicit default version in UTC tests, so that the --version + # embedded in UTC_ARGS does not change in all test expectations every time + # the default is bumped. + # if name != "%update_test_body": + # extra_args += " --version=1" + config.substitutions.append( + (name, "'%s' %s %s" % (python_exe, script_path, extra_args)) + ) + +# we support amdllpc and lgc for the tests, but lgc should use LLVM's update_test_checks.py +amdllpc_args = "--tool-binary " + shell_quote(os.path.join(config.amdllpc_dir, 'amdllpc')) +lgc_args = "--tool-binary " + shell_quote(os.path.join(config.llvm_tools_dir, 'lgc')) + " --tool lgc" + +add_update_script_substitution("%update_llpc_test_checks", extra_args=amdllpc_args) +add_update_script_substitution("%update_lgc_test_checks", extra_args=lgc_args) + +config.test_format = lit.formats.ShTest(execute_external=False) +config.suffixes = [".test"] diff --git a/llpc/test/tools/UpdateTestChecks/stable_ir_values.test b/llpc/test/tools/UpdateTestChecks/stable_ir_values.test new file mode 100644 index 0000000000..e8e154ffff --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/stable_ir_values.test @@ -0,0 +1,5 @@ +# RUN: cp -f %S/Inputs/stable_ir_values.pipe %t.pipe && %update_llpc_test_checks --generalize-calls %t.pipe +# RUN: diff -u %t.pipe %S/Inputs/stable_ir_values.pipe.expected +# Check that we get the same result after running twice +# RUN: %update_llpc_test_checks %t.pipe +# RUN: diff -u %t.pipe %S/Inputs/stable_ir_values.pipe.expected diff --git a/llpc/test/tools/UpdateTestChecks/stable_ir_values_reset.test b/llpc/test/tools/UpdateTestChecks/stable_ir_values_reset.test new file mode 100644 index 0000000000..0c0d0caba1 --- /dev/null +++ b/llpc/test/tools/UpdateTestChecks/stable_ir_values_reset.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values.pipe %t.pipe && %update_llpc_test_checks %t.pipe --reset-variable-names --generalize-calls +# RUN: diff -u %t.pipe %S/Inputs/stable_ir_values.pipe.expected.reset diff --git a/llpc/tool/amdllpc.cpp b/llpc/tool/amdllpc.cpp index c9f113606d..86dfcb7da4 100644 --- a/llpc/tool/amdllpc.cpp +++ b/llpc/tool/amdllpc.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2016-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2016-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -350,6 +350,12 @@ LlpcRaytracingModeSetting("llpc-raytracing-mode", cl::init(LlpcRaytracingMode::L cl::opt EnableColorExportShader("enable-color-export-shader", cl::desc("Enable color export shader, only compile each stage of the pipeline without linking"), cl::init(false)); + +// -pad-buffer-size-to-next-dword +cl::opt PadBufferSizeToNextDWORD("pad-buffer-size-to-next-dword", + cl::desc("Pad buffer size to next DWORD"), + cl::init(true)); + } // namespace // clang-format on namespace llvm { @@ -580,6 +586,10 @@ static void initCompileInfo(CompileInfo *compileInfo) { compileInfo->rayTracePipelineInfo.options.forceNonUniformResourceIndexStageMask = ForceNonUniformResourceIndexStageMask; + compileInfo->compPipelineInfo.options.padBufferSizeToNextDword = PadBufferSizeToNextDWORD; + compileInfo->gfxPipelineInfo.options.padBufferSizeToNextDword = PadBufferSizeToNextDWORD; + compileInfo->rayTracePipelineInfo.options.padBufferSizeToNextDword = PadBufferSizeToNextDWORD; + // Set NGG control settings auto &nggState = compileInfo->gfxPipelineInfo.nggState; diff --git a/llpc/tool/llpcAutoLayout.cpp b/llpc/tool/llpcAutoLayout.cpp index 69f238e95f..27590f9470 100644 --- a/llpc/tool/llpcAutoLayout.cpp +++ b/llpc/tool/llpcAutoLayout.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -422,7 +422,13 @@ void doAutoLayoutDesc(ShaderStage shaderStage, BinaryData spirvBin, GraphicsPipe if (!var->hasDecorate(DecorationLocation, 0, &location)) continue; + unsigned outCount = 1; SPIRVType *varElemTy = var->getType()->getPointerElementType(); + if (varElemTy->getOpCode() == OpTypeArray) { + outCount = varElemTy->getArrayLength(); + varElemTy = varElemTy->getArrayElementType(); + } + unsigned elemCount = 1; if (varElemTy->getOpCode() == OpTypeVector) { elemCount = varElemTy->getVectorComponentCount(); @@ -540,6 +546,12 @@ void doAutoLayoutDesc(ShaderStage shaderStage, BinaryData spirvBin, GraphicsPipe auto colorTarget = &pipelineInfo->cbState.target[location]; colorTarget->format = format; colorTarget->channelWriteMask = (1U << elemCount) - 1; + + for (unsigned idx = 1; idx < outCount; idx++) { + auto colorTarget = &pipelineInfo->cbState.target[location + idx]; + colorTarget->format = format; + colorTarget->channelWriteMask = (1U << elemCount) - 1; + } } } diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.cpp b/llpc/translator/lib/SPIRV/SPIRVReader.cpp index 88aa9775cb..f4b653f84a 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.cpp +++ b/llpc/translator/lib/SPIRV/SPIRVReader.cpp @@ -52,9 +52,12 @@ #include "llpcRayTracingContext.h" #include "compilerutils/TypesMetadata.h" #include "llvmraytracing/ContinuationsUtil.h" +#include "xdl/util/ElementType.h" #include "lgc/LgcDialect.h" #include "lgc/LgcRtDialect.h" #include "lgc/LgcRtqDialect.h" +#include "lgc/LgcXdlDialect.h" +#include "lgc/LgcXdlTypes.h" #include "lgc/Pipeline.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -101,6 +104,7 @@ using namespace SPIRV; using namespace Llpc; using namespace lgc::rt; using namespace lgc::rtq; +using namespace lgc::xdl; namespace Llpc { unsigned getTraceRayParamPayloadIdx(void); @@ -267,6 +271,27 @@ SPIRVWord getStd430AlignedTypeSize(SPIRVType *const spvType) { return 0; } +static Value *vectorCompositeConstruct(Type *vecTy, const std::vector &constituents, lgc::Builder *builder) { + Value *v = PoisonValue::get(vecTy); + for (unsigned idx = 0, i = 0, e = constituents.size(); i < e; ++i) { + if (constituents[i]->getType()->isVectorTy()) { + // NOTE: It is allowed to construct a vector from several "smaller" + // scalars or vectors, such as vec4 = (vec2, vec2) or vec4 = (float, + // vec3). + auto compCount = cast(constituents[i]->getType())->getNumElements(); + for (unsigned j = 0; j < compCount; ++j) { + auto *comp = builder->CreateExtractElement(constituents[i], builder->getInt32(j)); + v = builder->CreateInsertElement(v, comp, builder->getInt32(idx)); + ++idx; + } + } else { + v = builder->CreateInsertElement(v, constituents[i], builder->getInt32(idx)); + ++idx; + } + } + return v; +} + bool SPIRVToLLVM::isStorageClassExplicitlyLaidOut(SPIRVStorageClassKind storageClass) { return llvm::is_contained({StorageClassStorageBuffer, StorageClassUniform, StorageClassPushConstant, StorageClassPhysicalStorageBufferEXT}, @@ -377,6 +402,10 @@ ImageTypeIndices SPIRVToLLVM::getImageTypeIndices(unsigned imageComponents) cons result.convertingSamplerIdx = idx++; } + if (getPipelineOptions()->getGlState().enableDepthCompareParam) { + result.compareParamPointer = idx++; + } + return result; } @@ -404,6 +433,10 @@ Type *SPIRVToLLVM::getImageTy(unsigned imageComponents) const { types.push_back(i32); // convertingSamplerIdx } + if (getPipelineOptions()->getGlState().enableDepthCompareParam) { + types.push_back(getBuilder()->getBufferDescTy()); // pointer + } + return StructType::get(*m_context, types); } @@ -976,7 +1009,7 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *co unsigned columns = spvType->getCooperativeMatrixKHRColumns(); auto matrixLayout = getCooperativeMatrixKHRLayout(static_cast(use), elemType, rows, columns); const unsigned kSize = rows > columns ? rows : columns; - return getBuilder()->getCooperativeMatrixTy(elemType, matrixLayout, kSize); + return lgc::xdl::getCooperativeMatrixTy(*getBuilder(), elemType, matrixLayout, kSize); } // ===================================================================================================================== @@ -1308,10 +1341,10 @@ Value *SPIRVToLLVM::transConvertInst(SPIRVValue *bv, Function *f, BasicBlock *bb auto srcType = src->getType(); CastInst::CastOps co = Instruction::BitCast; - lgc::CooperativeMatrixElementType srcElemTy = lgc::CooperativeMatrixElementType::Unknown; - lgc::CooperativeMatrixElementType dstElemTy = lgc::CooperativeMatrixElementType::Unknown; - lgc::CooperativeMatrixLayout srcLayout = lgc::CooperativeMatrixLayout::InvalidLayout; - lgc::CooperativeMatrixLayout dstLayout = lgc::CooperativeMatrixLayout::InvalidLayout; + auto srcElemTy = lgc::xdl::CooperativeMatrixElementType::Unknown; + auto dstElemTy = lgc::xdl::CooperativeMatrixElementType::Unknown; + auto srcLayout = lgc::xdl::CooperativeMatrixLayout::InvalidLayout; + auto dstLayout = lgc::xdl::CooperativeMatrixLayout::InvalidLayout; bool isExt = dstType->getScalarSizeInBits() > srcType->getScalarSizeInBits(); if (bv->getType()->isTypeCooperativeMatrixKHR()) { @@ -1319,11 +1352,12 @@ Value *SPIRVToLLVM::transConvertInst(SPIRVValue *bv, Function *f, BasicBlock *bb srcElemTy = mapToBasicType(srcCompType); auto dstCompType = static_cast(dstSpvType)->getCooperativeMatrixKHRComponentType(); dstElemTy = mapToBasicType(dstCompType); + auto srcUse = static_cast(srcSpvType)->getCooperativeMatrixKHRUse(); auto dstUse = static_cast(dstSpvType)->getCooperativeMatrixKHRUse(); unsigned rows = static_cast(dstSpvType)->getCooperativeMatrixKHRRows(); unsigned columns = static_cast(dstSpvType)->getCooperativeMatrixKHRColumns(); dstLayout = getCooperativeMatrixKHRLayout(static_cast(dstUse), dstElemTy, rows, columns); - srcLayout = getCooperativeMatrixKHRLayout(static_cast(dstUse), srcElemTy, rows, columns); + srcLayout = getCooperativeMatrixKHRLayout(static_cast(srcUse), srcElemTy, rows, columns); } switch (bc->getOpCode()) { @@ -1350,9 +1384,9 @@ Value *SPIRVToLLVM::transConvertInst(SPIRVValue *bv, Function *f, BasicBlock *bb unsigned rows = static_cast(dstSpvType)->getCooperativeMatrixKHRRows(); unsigned columns = static_cast(dstSpvType)->getCooperativeMatrixKHRColumns(); const unsigned kSize = rows > columns ? rows : columns; - Type *matrixType = getBuilder()->getCooperativeMatrixTy(dstElemTy, dstLayout, kSize); - return getBuilder()->create(matrixType, co, src, srcElemTy, dstElemTy, srcLayout, - dstLayout, "convert"); + Type *matrixType = lgc::xdl::getCooperativeMatrixTy(*getBuilder(), dstElemTy, dstLayout, kSize); + return getBuilder()->create(matrixType, co, src, srcElemTy, dstElemTy, + srcLayout, dstLayout, "convert"); } if (co == Instruction::FPTrunc) { @@ -2116,7 +2150,9 @@ Value *SPIRVToLLVM::addLoadInstRecursively(SPIRVType *const spvType, Value *load return load; } - if (loadType->isArrayTy() && !spvType->isTypeVector() && !spvType->isTypeImage()) { + const bool isVectorTy = spvType->isTypeVector(); + SPIRVType *(SPIRVType::*getVectorElemType)() const = &SPIRVType::getVectorComponentType; + if (loadType->isArrayTy() && !isVectorTy && !spvType->isTypeImage()) { // Rewrite this condition to keep consistent with the assert on getArrayElementType/getMatrixColumnType later // Matrix and arrays both get here. For both we need to turn [<{element-type, pad}>] into [element-type]. const bool needsPad = isTypeWithPad(loadType); @@ -2146,12 +2182,13 @@ Value *SPIRVToLLVM::addLoadInstRecursively(SPIRVType *const spvType, Value *load return load; } - if (spvType->isTypeVector() && isCoherent) { + if (isVectorTy && isCoherent) { // Coherent load operand must be integer, pointer, or floating point type, so need to spilte vector. - SPIRVType *spvElementType = spvType->getVectorComponentType(); + SPIRVType *spvElementType = (spvType->*getVectorElemType)(); + unsigned elementCount = spvType->getVectorComponentCount(); Type *elementType = transType(spvElementType); - Value *load = PoisonValue::get(VectorType::get(elementType, spvType->getVectorComponentCount(), false)); - for (unsigned i = 0, elementCount = spvType->getVectorComponentCount(); i < elementCount; i++) { + Value *load = PoisonValue::get(VectorType::get(elementType, elementCount, false)); + for (unsigned i = 0; i < elementCount; i++) { Value *const elementLoadPointer = getBuilder()->CreateInBoundsGEP(loadType, loadPointer, {zero, getBuilder()->getInt32(i)}); Value *const elementLoad = addLoadInstRecursively(spvElementType, elementLoadPointer, elementType, isVolatile, @@ -2164,7 +2201,7 @@ Value *SPIRVToLLVM::addLoadInstRecursively(SPIRVType *const spvType, Value *load Type *alignmentType = loadType; // Vectors are represented as arrays in memory, so we need to cast the array to a vector before loading. - if (spvType->isTypeVector()) { + if (isVectorTy) { loadType = transType(spvType, 0, false, LayoutMode::Native); const bool scalarBlockLayout = getPipelineOptions()->scalarBlockLayout; @@ -2229,6 +2266,8 @@ void SPIRVToLLVM::addStoreInstRecursively(SPIRVType *const spvType, Value *store } const bool useSGep = storePointer->getType()->getPointerAddressSpace() == SPIRAS_Output; + const bool isVectorTy = spvType->isTypeVector(); + SPIRVType *(SPIRVType::*getVectorElemType)() const = &SPIRVType::getVectorComponentType; Value *const zero = getBuilder()->getInt32(0); if (storeType->isStructTy() && !spvType->isTypeSampledImage() && !spvType->isTypeImage() && !spvType->isTypeSampler() && spvType->getOpCode() != OpTypeRayQueryKHR) { @@ -2246,7 +2285,7 @@ void SPIRVToLLVM::addStoreInstRecursively(SPIRVType *const spvType, Value *store addStoreInstRecursively(spvType->getStructMemberType(i), memberStorePointer, memberStoreType, memberStoreValue, isVolatile, isCoherent, isNonTemporal); } - } else if (storeType->isArrayTy() && !spvType->isTypeVector() && !spvType->isTypeImage()) { + } else if (storeType->isArrayTy() && !isVectorTy && !spvType->isTypeImage()) { // Matrix and arrays both get here. For both we need to turn [element-type] into [<{element-type, pad}>]. const bool needsPad = isTypeWithPad(storeType); @@ -2267,11 +2306,11 @@ void SPIRVToLLVM::addStoreInstRecursively(SPIRVType *const spvType, Value *store addStoreInstRecursively(spvElementType, elementStorePointer, elementStoreType, elementStoreValue, isVolatile, isCoherent, isNonTemporal); } - } else if (spvType->isTypeVector() && isCoherent) { + } else if (isVectorTy && isCoherent) { // Coherent store operand must be integer, pointer, or floating point type, so need to spilte vector. - SPIRVType *spvElementType = spvType->getVectorComponentType(); - - for (unsigned i = 0, elementCount = spvType->getVectorComponentCount(); i < elementCount; i++) { + SPIRVType *spvElementType = (spvType->*getVectorElemType)(); + unsigned elementCount = spvType->getVectorComponentCount(); + for (unsigned i = 0; i < elementCount; i++) { Value *indices[] = {zero, getBuilder()->getInt32(i)}; Value *const elementStorePointer = getBuilder()->CreateInBoundsGEP(storeType, storePointer, indices); Type *const elementStoreType = GetElementPtrInst::getIndexedType(storeType, indices); @@ -2292,7 +2331,7 @@ void SPIRVToLLVM::addStoreInstRecursively(SPIRVType *const spvType, Value *store } // Vectors are represented as arrays in memory, so we need to cast the array to a vector before storing. - if (spvType->isTypeVector()) { + if (isVectorTy) { const bool scalarBlockLayout = getPipelineOptions()->scalarBlockLayout; if (!scalarBlockLayout) alignmentType = storeType; @@ -2813,61 +2852,11 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SP template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { SPIRVCopyMemory *const spvCopyMemory = static_cast(spvValue); - bool isSrcVolatile = spvCopyMemory->SPIRVMemoryAccess::isVolatile(true); - - // We don't require volatile on address spaces that become non-pointers. - switch (spvCopyMemory->getSource()->getType()->getPointerStorageClass()) { - case StorageClassInput: - case StorageClassOutput: - case StorageClassPrivate: - case StorageClassFunction: - isSrcVolatile = false; - break; - default: - break; - } - - bool isDestVolatile = spvCopyMemory->SPIRVMemoryAccess::isVolatile(false); + bool isSrcVolatile = checkVolatile(spvCopyMemory->getSource(), spvCopyMemory, true); + bool isDestVolatile = checkVolatile(spvCopyMemory->getTarget(), spvCopyMemory, false); - // We don't require volatile on address spaces that become non-pointers. - switch (spvCopyMemory->getTarget()->getType()->getPointerStorageClass()) { - case StorageClassInput: - case StorageClassOutput: - case StorageClassPrivate: - case StorageClassFunction: - isDestVolatile = false; - break; - default: - break; - } - - bool isCoherent = false; - - if (spvCopyMemory->getMemoryAccessMask(true) & MemoryAccessMakePointerVisibleKHRMask) { - SPIRVWord spvId = spvCopyMemory->getMakeVisibleScope(true); - SPIRVConstant *const spvScope = static_cast(m_bm->getValue(spvId)); - const unsigned scope = spvScope->getZExtIntValue(); - - const bool isSystemScope = (scope <= ScopeDevice || scope == ScopeQueueFamilyKHR); - - if (isSystemScope) - isCoherent = true; - } - if (spvCopyMemory->getMemoryAccessMask(true) & MemoryAccessNonPrivatePointerKHRMask) - isCoherent = true; - - if (spvCopyMemory->getMemoryAccessMask(false) & MemoryAccessMakePointerAvailableKHRMask) { - SPIRVWord spvId = spvCopyMemory->getMakeAvailableScope(false); - SPIRVConstant *const spvScope = static_cast(m_bm->getValue(spvId)); - const unsigned scope = spvScope->getZExtIntValue(); - - const bool isSystemScope = (scope <= ScopeDevice || scope == ScopeQueueFamilyKHR); - - if (isSystemScope) - isCoherent = true; - } - if (spvCopyMemory->getMemoryAccessMask(false) & MemoryAccessNonPrivatePointerKHRMask) - isCoherent = true; + bool isCoherent = checkCoherent(spvCopyMemory->getSource(), spvCopyMemory, true) || + checkCoherent(spvCopyMemory->getTarget(), spvCopyMemory, false); Value *const loadPointer = transValue(spvCopyMemory->getSource(), getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); @@ -2947,13 +2936,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const s layout = LayoutMode::Scalar; } - bool isVolatile = spvLoad->SPIRVMemoryAccess::isVolatile(true); - const Vkgc::ExtendedRobustness &extendedRobustness = getPipelineOptions()->extendedRobustness; - if (extendedRobustness.nullDescriptor || extendedRobustness.robustBufferAccess) - isVolatile |= spvLoad->getSrc()->isVolatile(); - // Translate a volatile load of BuiltInHelperInvocation to a call to IsHelperInvocation. - if ((isVolatile || spvLoad->getSrc()->isVolatile()) && + if ((spvLoad->SPIRVMemoryAccess::isVolatile(true) || spvLoad->getSrc()->isVolatile()) && spvLoad->getSrc()->getType()->getPointerStorageClass() == StorageClassInput) { if (GlobalVariable *gv = dyn_cast(loadPointer)) { SPIRVBuiltinVariableKind kind; @@ -2964,34 +2948,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const s } } - // We don't require volatile on address spaces that become non-pointers. - switch (spvLoad->getSrc()->getType()->getPointerStorageClass()) { - case StorageClassInput: - case StorageClassOutput: - case StorageClassPrivate: - case StorageClassFunction: - isVolatile = false; - break; - default: - break; - } - - bool isCoherent = spvLoad->getSrc()->isCoherent(); - - // MakePointerVisibleKHR is valid with OpLoad - if (spvLoad->getMemoryAccessMask(true) & MemoryAccessMakePointerVisibleKHRMask) { - SPIRVWord spvId = spvLoad->getMakeVisibleScope(true); - SPIRVConstant *const spvScope = static_cast(m_bm->getValue(spvId)); - const unsigned scope = spvScope->getZExtIntValue(); - - const bool isSystemScope = (scope <= ScopeDevice || scope == ScopeQueueFamilyKHR); - - if (isSystemScope) - isCoherent = true; - } - - if (spvLoad->getMemoryAccessMask(true) & MemoryAccessNonPrivatePointerKHRMask) - isCoherent = true; + bool isVolatile = checkVolatile(spvLoad->getSrc(), spvLoad, true); + auto isCoherent = checkCoherent(spvLoad->getSrc(), spvLoad, true); if (spvLoad->getSrc()->getType()->getPointerStorageClass() == StorageClassTaskPayloadWorkgroupEXT) isCoherent = true; @@ -3130,6 +3088,7 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy unsigned imageDescSet = descriptorSet; unsigned fmaskDescSet = descriptorSet; unsigned samplerDescSet = descriptorSet; + unsigned compareParamDescSet = descriptorSet; if (getPipelineOptions()->getGlState().replaceSetWithResourceType) { assert(spvTy->getOpCode() != OpTypeSampler); @@ -3153,6 +3112,9 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy if (!getPipelineOptions()->getGlState().enableCombinedTexture) samplerDescSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorSampler); + + if (getPipelineOptions()->getGlState().enableDepthCompareParam) + compareParamDescSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::InlineBuffer); } unsigned convertingSamplerIdx = 0; @@ -3215,6 +3177,13 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy idxs.convertingSamplerIdx); } + if (getPipelineOptions()->getGlState().enableDepthCompareParam) { + Value *compareParamPointer = getBuilder()->create( + compareParamDescSet, binding, m_builder->getInt32(0), lgc::Builder::BufferFlagConst); + getBuilder()->CreateInvariantStart(compareParamPointer); + result = getBuilder()->CreateInsertValue(result, compareParamPointer, idxs.compareParamPointer); + } + return result; } @@ -3233,41 +3202,10 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const return nullptr; } - bool isVolatile = spvStore->SPIRVMemoryAccess::isVolatile(false); - const Vkgc::ExtendedRobustness &extendedRobustness = getPipelineOptions()->extendedRobustness; - if (extendedRobustness.nullDescriptor || extendedRobustness.robustBufferAccess) - isVolatile |= spvStore->getDst()->isVolatile(); + bool isVolatile = checkVolatile(spvStore->getDst(), spvStore, false); + bool isCoherent = checkCoherent(spvStore->getDst(), spvStore, false); - // We don't require volatile on address spaces that become non-pointers. const auto pointerStorageClass = spvStore->getDst()->getType()->getPointerStorageClass(); - switch (pointerStorageClass) { - case StorageClassInput: - case StorageClassOutput: - case StorageClassPrivate: - case StorageClassFunction: - isVolatile = false; - break; - default: - break; - } - - bool isCoherent = spvStore->getDst()->isCoherent(); - - // MakePointerAvailableKHR is valid with OpStore - if (spvStore->getMemoryAccessMask(false) & MemoryAccessMakePointerAvailableKHRMask) { - SPIRVWord spvId = spvStore->getMakeAvailableScope(false); - SPIRVConstant *const spvScope = static_cast(m_bm->getValue(spvId)); - const unsigned scope = spvScope->getZExtIntValue(); - - const bool isSystemScope = (scope <= ScopeDevice || scope == ScopeQueueFamilyKHR); - - if (isSystemScope) - isCoherent = true; - } - - if (spvStore->getMemoryAccessMask(false) & MemoryAccessNonPrivatePointerKHRMask) - isCoherent = true; - if (pointerStorageClass == StorageClassTaskPayloadWorkgroupEXT) isCoherent = true; @@ -3602,8 +3540,10 @@ SmallVector SPIRVToLLVM::transAccessChain(SPIRVValue *const spvValue) { break; } case OpTypeVector: { - gepIndices.push_back(index); + spvAccessElementType = spvAccessElementType->getVectorComponentType(); + gepIndices.push_back(index); + break; } case OpTypeCooperativeMatrixKHR: { @@ -3613,8 +3553,8 @@ SmallVector SPIRVToLLVM::transAccessChain(SPIRVValue *const spvValue) { unsigned columns = spvAccessElementType->getCooperativeMatrixKHRColumns(); spvAccessElementType = spvAccessElementType->getCooperativeMatrixKHRComponentType(); basePointeeType = transType(spvAccessElementType); - lgc::CooperativeMatrixElementType elemType = mapToBasicType(spvAccessElementType); - lgc::CooperativeMatrixLayout layout = + lgc::xdl::CooperativeMatrixElementType elemType = mapToBasicType(spvAccessElementType); + lgc::xdl::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout(static_cast(use), elemType, rows, columns); std::string mangledName(LlpcName::SpirvCooperativeMatrixProxy); @@ -4493,7 +4433,7 @@ Value *SPIRVToLLVM::transGroupArithOp(Builder::GroupArithOp groupArithOp, SPIRVV Function *const func = getBuilder()->GetInsertBlock()->getParent(); Value *const value = transValue(spvOperands[2], func, block); Value *const clusterSize = - spvOperands.size() > 3 ? transValue(spvOperands[3], func, block) : getBuilder()->CreateGetWaveSize(); + spvOperands.size() > 3 ? transValue(spvOperands[3], func, block) : getBuilder()->getInt32(0); switch (static_cast(spvOperands[1])->getZExtIntValue()) { case GroupOperationReduce: @@ -5165,14 +5105,11 @@ Value *SPIRVToLLVM::transVariableNonImage(SPIRVValue *const spvValue) { break; } case StorageClassUniformConstant: { - if (spvVarType->isTypeAccelerationStructureKHR()) { - readOnly = true; - } - - if (spvVarType->isTypeArray() && spvVarType->getArrayElementType()->isTypeAccelerationStructureKHR()) { + // We can only mark the variable as readonly if there is no initializer, otherwise the initial value will be + // constant-propagated during compilation. In case of default uniform constants, its value may be changed before + // shader run. So it is not compile-time constant, but constant during shader execution. + if (!initializer) readOnly = true; - } - break; } @@ -5324,11 +5261,12 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVV SPIRVType *elemSpvType = spvOperands[0]->getType()->getCooperativeMatrixKHRComponentType(); unsigned rows = spvOperands[0]->getType()->getCooperativeMatrixKHRRows(); unsigned columns = spvOperands[0]->getType()->getCooperativeMatrixKHRColumns(); - lgc::CooperativeMatrixElementType elemType = mapToBasicType(elemSpvType); - lgc::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout( + lgc::xdl::CooperativeMatrixElementType elemType = mapToBasicType(elemSpvType); + lgc::xdl::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout( static_cast(spvOperands[0]->getType()->getCooperativeMatrixKHRUse()), elemType, rows, columns); - return getBuilder()->create(matrix->getType(), matrix, scalar, elemType, layout); + return getBuilder()->create(matrix->getType(), matrix, scalar, elemType, + layout); } else { return getBuilder()->CreateMatrixTimesScalar(matrix, scalar); } @@ -5434,18 +5372,18 @@ Value *SPIRVToLLVM::transString(const SPIRVString *spvValue) { // | iu4 | i32 | Y | Y | // For integer types, arbitrary signedness combinations are supported for the // A/B matrices.C/D matrices are always signed. -lgc::CooperativeMatrixElementType SPIRVToLLVM::mapToBasicType(SPIRVType *const elemType) { - lgc::CooperativeMatrixElementType basicTy = lgc::CooperativeMatrixElementType::Unknown; +lgc::xdl::CooperativeMatrixElementType SPIRVToLLVM::mapToBasicType(SPIRVType *const elemType) { + auto basicTy = lgc::xdl::CooperativeMatrixElementType::Unknown; if (elemType->isTypeInt(8)) { - basicTy = lgc::CooperativeMatrixElementType::Int8; + basicTy = lgc::xdl::CooperativeMatrixElementType::Int8; } else if (elemType->isTypeInt(16)) { - basicTy = lgc::CooperativeMatrixElementType::Int16; + basicTy = lgc::xdl::CooperativeMatrixElementType::Int16; } else if (elemType->isTypeInt(32)) { - basicTy = lgc::CooperativeMatrixElementType::Int32; + basicTy = lgc::xdl::CooperativeMatrixElementType::Int32; } else if (elemType->isTypeFloat(32)) { - basicTy = lgc::CooperativeMatrixElementType::Float32; + basicTy = lgc::xdl::CooperativeMatrixElementType::Float32; } else if (elemType->isTypeFloat(16)) { - basicTy = lgc::CooperativeMatrixElementType::Float16; + basicTy = lgc::xdl::CooperativeMatrixElementType::Float16; } else { llvm_unreachable("The element type is not supported!"); } @@ -5458,27 +5396,28 @@ lgc::CooperativeMatrixElementType SPIRVToLLVM::mapToBasicType(SPIRVType *const e // @param elemType : The type for the CooperativeMatrix element. // @param rows: The size of the row for the CooperativeMatrix. // @param columns: The size of the column for the CooperativeMatrix. -lgc::CooperativeMatrixLayout SPIRVToLLVM::getCooperativeMatrixKHRLayout(CooperativeMatrixUse use, - lgc::CooperativeMatrixElementType elemType, - unsigned rows, unsigned columns) { +lgc::xdl::CooperativeMatrixLayout +SPIRVToLLVM::getCooperativeMatrixKHRLayout(CooperativeMatrixUse use, lgc::xdl::CooperativeMatrixElementType elemType, + unsigned rows, unsigned columns) { [[maybe_unused]] const Vkgc::GfxIpVersion gfxIp = getPipelineContext()->getGfxIpVersion(); if (use == CooperativeMatrixUse::CooperativeMatrixUseMatrixAKHR || use == CooperativeMatrixUse::CooperativeMatrixUseMatrixBKHR) { - return lgc::CooperativeMatrixLayout::FactorMatrixLayout; + return lgc::xdl::CooperativeMatrixLayout::FactorMatrixLayout; } if (use == CooperativeMatrixUse::CooperativeMatrixUseMatrixAccumulatorKHR) { if (gfxIp.major == 11) - return lgc::CooperativeMatrixLayout::AccumulatorMatrixLayout; - if (BuilderCommon::isTypeNCooperativeMatrix(elemType, 32)) - return lgc::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout; - if (elemType == lgc::CooperativeMatrixElementType::Int16 || elemType == lgc::CooperativeMatrixElementType::Float16) - return lgc::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout; + return lgc::xdl::CooperativeMatrixLayout::AccumulatorMatrixLayout; + if (lgc::xdl::isTypeNCooperativeMatrix(elemType, 32)) + return lgc::xdl::CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout; + if (elemType == lgc::xdl::CooperativeMatrixElementType::Int16 || + elemType == lgc::xdl::CooperativeMatrixElementType::Float16) + return lgc::xdl::CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout; llvm_unreachable("Invalid element type!"); - return lgc::CooperativeMatrixLayout::InvalidLayout; + return lgc::xdl::CooperativeMatrixLayout::InvalidLayout; } llvm_unreachable("The element type is not supported!"); - return lgc::CooperativeMatrixLayout::InvalidLayout; + return lgc::xdl::CooperativeMatrixLayout::InvalidLayout; } // ===================================================================================================================== @@ -5493,7 +5432,63 @@ template <> Value *SPIRVToLLVM::transValueWithOpcodegetCooperativeMatrixKHRColumns(); auto layout = getCooperativeMatrixKHRLayout(matrixUse, elemType, rows, columns); const unsigned kSize = rows > columns ? rows : columns; - return getBuilder()->create(layout, kSize); + return getBuilder()->create(layout, kSize); +} + +// ===================================================================================================================== +// Check if load/store is volatile. +// @param pointer : Pointer of the memory access. +// @param access : SPIR-V memory access instruction. +// @param isLoad : TRUE for load, FALSE for store. +bool SPIRVToLLVM::checkVolatile(SPIRVValue *pointer, SPIRVMemoryAccess *access, bool isLoad) { + assert(pointer && access); + + // We don't require volatile on address spaces that become non-pointers. + switch (pointer->getType()->getPointerStorageClass()) { + case StorageClassInput: + case StorageClassOutput: + case StorageClassPrivate: + case StorageClassFunction: + return false; + default: + break; + } + + if (access->SPIRVMemoryAccess::isVolatile(isLoad)) + return true; + + const Vkgc::ExtendedRobustness &extendedRobustness = getPipelineOptions()->extendedRobustness; + return (extendedRobustness.nullDescriptor || extendedRobustness.robustBufferAccess) && pointer->isVolatile(); +} + +// ===================================================================================================================== +// Check if load/store is coherent. +// @param pointer : Pointer of the memory access. +// @param access : SPIR-V memory access instruction. +// @param isLoad : TRUE for load, FALSE for store. +bool SPIRVToLLVM::checkCoherent(SPIRVValue *pointer, SPIRVMemoryAccess *access, bool isLoad) { + assert(pointer && access); + + if (pointer->isCoherent()) + return true; + + spv::MemoryAccessMask memAccessMask = (spv::MemoryAccessMask)access->getMemoryAccessMask(isLoad); + if (memAccessMask & MemoryAccessNonPrivatePointerKHRMask) + return true; + + SPIRVWord spvId = SPIRVID_INVALID; + if (isLoad && (memAccessMask & MemoryAccessMakePointerVisibleKHRMask)) + spvId = access->getMakeVisibleScope(isLoad); + else if (!isLoad && (memAccessMask & MemoryAccessMakePointerAvailableKHRMask)) + spvId = access->getMakeAvailableScope(isLoad); + + if (spvId != SPIRVID_INVALID) { + SPIRVConstant *const spvScope = static_cast(m_bm->getValue(spvId)); + const unsigned scope = spvScope->getZExtIntValue(); + return (scope <= ScopeDevice || scope == ScopeQueueFamilyKHR); + } + + return false; } // ===================================================================================================================== @@ -5508,6 +5503,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode Value *pointer = transValue(coopMatLoad->getSrc(), fn, bb); Value *arrayStride = transValue(coopMatLoad->getStride(), fn, bb); Value *colMajor = transValue(coopMatLoad->getColMajor(), fn, bb); + SPIRVType *elemSpvType = coopMatLoad->getType()->getCooperativeMatrixKHRComponentType(); // The lgc operation expects the stride to be in bytes. auto pointeeSize = m_m->getDataLayout().getTypeStoreSize(getPointeeType(coopMatLoad->getSrc())).getFixedValue(); @@ -5515,58 +5511,28 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode Value *stride = getBuilder()->CreateMul(arrayStride, getBuilder()->getInt32(pointeeSize)); - // Calc memoryAccess - unsigned memoryAccess = CooperativeMatrixMemoryAccessNone; - // Calc isVolatile - bool isVolatile = coopMatLoad->SPIRVMemoryAccess::isVolatile(true); - const Vkgc::ExtendedRobustness &extendedRobustness = getPipelineOptions()->extendedRobustness; - if (extendedRobustness.nullDescriptor || extendedRobustness.robustBufferAccess) - isVolatile |= coopMatLoad->getSrc()->isVolatile(); - // We don't require volatile on address spaces that become non-pointers. - switch (coopMatLoad->getSrc()->getType()->getPointerStorageClass()) { - case StorageClassInput: - case StorageClassOutput: - case StorageClassPrivate: - case StorageClassFunction: - isVolatile = false; - break; - default: - break; - } - - // Calc isCoherent - bool isCoherent = coopMatLoad->getSrc()->isCoherent(); - // MakePointerVisibleKHR is valid with OpCooperativeMatrixLoadKHR - if (coopMatLoad->getMemoryAccessMask(true) & MemoryAccessMakePointerVisibleKHRMask) { - SPIRVWord spvId = coopMatLoad->getMakeVisibleScope(true); - SPIRVConstant *const spvScope = static_cast(m_bm->getValue(spvId)); - const unsigned scope = spvScope->getZExtIntValue(); - const bool isSystemScope = (scope <= ScopeDevice || scope == ScopeQueueFamilyKHR); - if (isSystemScope) - isCoherent = true; - } - if (coopMatLoad->getMemoryAccessMask(true) & MemoryAccessNonPrivatePointerKHRMask) - isCoherent = true; - - // Calc isNonTempal + // Calc volatile/coherent/isNonTempal + const bool isVolatile = checkVolatile(coopMatLoad->getSrc(), coopMatLoad, true); + const bool isCoherent = checkCoherent(coopMatLoad->getSrc(), coopMatLoad, true); const bool isNonTemporal = coopMatLoad->SPIRVMemoryAccess::isNonTemporal(true); + + auto memoryAccess = static_cast(lgc::xdl::CooperativeMatrixMemoryAccess::MemoryAccessMaskNone); if (isVolatile) { - memoryAccess |= CooperativeMatrixMemoryAccessVolatile; + memoryAccess |= static_cast(lgc::xdl::CooperativeMatrixMemoryAccess::MemoryAccessVolatileMask); } if (isCoherent) { - memoryAccess |= CooperativeMatrixMemoryAccessCoherent; + memoryAccess |= static_cast(lgc::xdl::CooperativeMatrixMemoryAccess::MemoryAccessCoherentMask); } if (isNonTemporal) { - memoryAccess |= CooperativeMatrixMemoryAccessTemporal; + memoryAccess |= static_cast(lgc::xdl::CooperativeMatrixMemoryAccess::MemoryAccessTemporalMask); } bool isColMajor = cast(colMajor)->getZExtValue(); // Cal elemType - SPIRVType *elemSpvType = coopMatLoad->getType()->getCooperativeMatrixKHRComponentType(); CooperativeMatrixUse use = static_cast(coopMatLoad->getType()->getCooperativeMatrixKHRUse()); unsigned rows = static_cast(coopMatLoad->getType()->getCooperativeMatrixKHRRows()); unsigned columns = static_cast(coopMatLoad->getType()->getCooperativeMatrixKHRColumns()); - lgc::CooperativeMatrixElementType elemType = mapToBasicType(elemSpvType); + lgc::xdl::CooperativeMatrixElementType elemType = mapToBasicType(elemSpvType); if (use == CooperativeMatrixUse::CooperativeMatrixUseMatrixAKHR) { // Layout A is the transposition of the layout B, col_major_A = row_majow_B. // FactorMatrixLayout is for B, so it needs inverse the layout when use is A. @@ -5576,15 +5542,15 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode // must be aligned to at least the lesser of 16 bytes or the natural alignment of a row or column // (depending on ColumnMajor) of the matrix (where the natural alignment is the number of columns/rows multiplied // by the component size). - Type *elementllType = getBuilder()->transCooperativeMatrixElementType(elemType); + Type *elementllType = lgc::xdl::transCooperativeMatrixElementType(*getBuilder(), elemType); unsigned elementSize = static_cast(m_m->getDataLayout().getTypeSizeInBits(elementllType) / 8); elementSize = std::max(elementSize, (unsigned)1); unsigned alignmentInRowCol = (isColMajor ? rows : columns) * elementSize; unsigned loadAlignment = std::min((unsigned)16, alignmentInRowCol); - lgc::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout(use, elemType, rows, columns); + lgc::xdl::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout(use, elemType, rows, columns); const unsigned kSize = rows > columns ? rows : columns; - Type *coopMatrixTy = getBuilder()->getCooperativeMatrixTy(elemType, layout, kSize); - auto CoopMatLoadInst = getBuilder()->create( + Type *coopMatrixTy = lgc::xdl::getCooperativeMatrixTy(*getBuilder(), elemType, layout, kSize); + auto CoopMatLoadInst = getBuilder()->create( coopMatrixTy, pointer, stride, isColMajor, elemType, layout, memoryAccess, loadAlignment, kSize, "load"); return CoopMatLoadInst; } @@ -5606,51 +5572,21 @@ template <> Value *SPIRVToLLVM::transValueWithOpcodegetDataLayout().getTypeStoreSize(getPointeeType(coopMatStore->getDest())).getFixedValue(); assert(pointeeSize != 0 && "OpCooperativeMatrixStoreKHR pointee must be a scalar or vector"); + SPIRVType *elemSpvType = coopMatStore->getObject()->getType()->getCooperativeMatrixKHRComponentType(); Value *stride = getBuilder()->CreateMul(arrayStride, getBuilder()->getInt32(pointeeSize)); - // Calc isVolatile - bool isVolatile = coopMatStore->SPIRVMemoryAccess::isVolatile(false); - const Vkgc::ExtendedRobustness &extendedRobustness = getPipelineOptions()->extendedRobustness; - if (extendedRobustness.nullDescriptor || extendedRobustness.robustBufferAccess) - isVolatile |= coopMatStore->getDest()->isVolatile(); - // We don't require volatile on address spaces that become non-pointers. - const auto pointerStorageClass = coopMatStore->getDest()->getType()->getPointerStorageClass(); - switch (pointerStorageClass) { - case StorageClassInput: - case StorageClassOutput: - case StorageClassPrivate: - case StorageClassFunction: - isVolatile = false; - break; - default: - break; - } - - // Calc isCoherent - bool isCoherent = coopMatStore->getDest()->isCoherent(); - // MakePointerAvailableKHR is valid with OpStore - if (coopMatStore->getMemoryAccessMask(false) & MemoryAccessMakePointerAvailableKHRMask) { - SPIRVWord spvId = coopMatStore->getMakeAvailableScope(false); - SPIRVConstant *const spvScope = static_cast(m_bm->getValue(spvId)); - const unsigned scope = spvScope->getZExtIntValue(); - const bool isSystemScope = (scope <= ScopeDevice || scope == ScopeQueueFamilyKHR); - if (isSystemScope) - isCoherent = true; - } - if (coopMatStore->getMemoryAccessMask(false) & MemoryAccessNonPrivatePointerKHRMask) - isCoherent = true; - - // Calc isNonTempal + // Calc volatile/coherent/isNonTempal + const bool isVolatile = checkVolatile(coopMatStore->getDest(), coopMatStore, false); + const bool isCoherent = checkCoherent(coopMatStore->getDest(), coopMatStore, false); const bool isNonTemporal = coopMatStore->SPIRVMemoryAccess::isNonTemporal(false); // Calc colMajor bool isColMajor = cast(colMajor)->getZExtValue(); // Cal elemType - Type *const elemltType = transType(coopMatStore->getObject()->getType()->getCooperativeMatrixKHRComponentType()); - lgc::CooperativeMatrixElementType elemType = - mapToBasicType(coopMatStore->getObject()->getType()->getCooperativeMatrixKHRComponentType()); + Type *const elemltType = transType(elemSpvType); + lgc::xdl::CooperativeMatrixElementType elemType = mapToBasicType(elemSpvType); CooperativeMatrixUse use = static_cast(coopMatStore->getObject()->getType()->getCooperativeMatrixKHRUse()); @@ -5662,16 +5598,16 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(lgc::xdl::CooperativeMatrixMemoryAccess::MemoryAccessMaskNone); if (isVolatile) { - memoryAccess |= CooperativeMatrixMemoryAccessVolatile; + memoryAccess |= static_cast(lgc::xdl::CooperativeMatrixMemoryAccess::MemoryAccessVolatileMask); } if (isCoherent) { - memoryAccess |= CooperativeMatrixMemoryAccessCoherent; + memoryAccess |= static_cast(lgc::xdl::CooperativeMatrixMemoryAccess::MemoryAccessCoherentMask); } if (isNonTemporal) { - memoryAccess |= CooperativeMatrixMemoryAccessTemporal; + memoryAccess |= static_cast(lgc::xdl::CooperativeMatrixMemoryAccess::MemoryAccessTemporalMask); } // For OpCooperativeMatrixLoadKHR and OpCooperativeMatrixStoreKHR instructions, the Pointer and Stride operands @@ -5683,8 +5619,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode columns ? rows : columns; - getBuilder()->create(pointer, stride, isColMajor, elemType, layout, memoryAccess, - storeAlignment, matrix, kSize); + getBuilder()->create(pointer, stride, isColMajor, elemType, layout, memoryAccess, + storeAlignment, matrix, kSize); return nullptr; } @@ -5703,8 +5639,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcodegetType()->getCooperativeMatrixKHRComponentType(); SPIRVType *elemTypeC = spvOperands[2]->getType()->getCooperativeMatrixKHRComponentType(); - lgc::CooperativeMatrixElementType elemBasicTypeA = mapToBasicType(elemTypeA); - lgc::CooperativeMatrixElementType elemBasicTypeC = mapToBasicType(elemTypeC); + lgc::xdl::CooperativeMatrixElementType elemBasicTypeA = mapToBasicType(elemTypeA); + lgc::xdl::CooperativeMatrixElementType elemBasicTypeC = mapToBasicType(elemTypeC); bool isSignedA = static_cast(static_cast(spvInst)->getMatrixASigned()); bool isSignedB = static_cast(static_cast(spvInst)->getMatrixBSigned()); @@ -5714,8 +5650,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcodegetType(); - lgc::CooperativeMatrixElementType elemBasicTypeD = elemBasicTypeC; - Value *coopMatrixD = getBuilder()->create( + lgc::xdl::CooperativeMatrixElementType elemBasicTypeD = elemBasicTypeC; + Value *coopMatrixD = getBuilder()->create( coopMatrixDType, coopMatrixA, coopMatrixB, coopMatrixC, isSignedA, isSignedB, isSat, 0, elemBasicTypeA, elemBasicTypeA, elemBasicTypeC, elemBasicTypeD, kMultiplier, "mulAdd"); return coopMatrixD; @@ -6075,12 +6011,14 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu if (auto vecTy = dyn_cast(cond->getType())) { unsigned numComponents = vecTy->getNumElements(); result = PoisonValue::get(lhs->getType()); - for (unsigned i = 0; i != numComponents; ++i) { - Value *compCond = getBuilder()->CreateExtractElement(cond, i); - Value *compLhs = getBuilder()->CreateExtractElement(lhs, i); - Value *compRhs = getBuilder()->CreateExtractElement(rhs, i); - Value *compResult = getBuilder()->CreateSelect(compCond, compLhs, compRhs); - result = getBuilder()->CreateInsertElement(result, compResult, i); + { + for (unsigned i = 0; i != numComponents; ++i) { + Value *compCond = getBuilder()->CreateExtractElement(cond, i); + Value *compLhs = getBuilder()->CreateExtractElement(lhs, i); + Value *compRhs = getBuilder()->CreateExtractElement(rhs, i); + Value *compResult = getBuilder()->CreateSelect(compCond, compLhs, compRhs); + result = getBuilder()->CreateInsertElement(result, compResult, i); + } } } else { result = getBuilder()->CreateSelect(cond, lhs, rhs); @@ -6204,24 +6142,7 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu switch (bv->getType()->getOpCode()) { case OpTypeVector: { auto vecTy = transType(cc->getType()); - Value *v = PoisonValue::get(vecTy); - for (unsigned idx = 0, i = 0, e = constituents.size(); i < e; ++i) { - if (constituents[i]->getType()->isVectorTy()) { - // NOTE: It is allowed to construct a vector from several "smaller" - // scalars or vectors, such as vec4 = (vec2, vec2) or vec4 = (float, - // vec3). - auto compCount = cast(constituents[i]->getType())->getNumElements(); - for (unsigned j = 0; j < compCount; ++j) { - auto comp = ExtractElementInst::Create(constituents[i], ConstantInt::get(*m_context, APInt(32, j)), "", bb); - v = InsertElementInst::Create(v, comp, ConstantInt::get(*m_context, APInt(32, idx)), "", bb); - ++idx; - } - } else { - v = InsertElementInst::Create(v, constituents[i], ConstantInt::get(*m_context, APInt(32, idx)), "", bb); - ++idx; - } - } - return mapValue(bv, v); + return mapValue(bv, vectorCompositeConstruct(vecTy, constituents, getBuilder())); } case OpTypeArray: case OpTypeStruct: { @@ -6304,11 +6225,12 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu case OpCompositeExtract: { SPIRVCompositeExtract *ce = static_cast(bv); - if (ce->getComposite()->getType()->isTypeVector()) { + auto cv = transValue(ce->getComposite(), f, bb); + const bool isVectorTy = ce->getComposite()->getType()->isTypeVector(); + if (isVectorTy) { assert(ce->getIndices().size() == 1 && "Invalid index"); - return mapValue(bv, ExtractElementInst::Create(transValue(ce->getComposite(), f, bb), - ConstantInt::get(*m_context, APInt(32, ce->getIndices()[0])), - bv->getName(), bb)); + auto index = ce->getIndices()[0]; + return mapValue(bv, getBuilder()->CreateExtractElement(cv, index)); } if (ce->getComposite()->getType()->isTypeCooperativeMatrixKHR()) { assert(ce->getIndices().size() == 1 && "Invalid index"); // Treating it as vector. @@ -6319,14 +6241,13 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu unsigned columns = matrixType->getCooperativeMatrixKHRColumns(); auto layout = getCooperativeMatrixKHRLayout( static_cast(matrixType->getCooperativeMatrixKHRUse()), elemType, rows, columns); - Type *extractElementType = getBuilder()->transCooperativeMatrixElementType(elemType); + Type *extractElementType = lgc::xdl::transCooperativeMatrixElementType(*getBuilder(), elemType); Value *matrix = transValue(ce->getComposite(), f, bb); Value *index = getBuilder()->getInt32(ce->getIndices()[0]); - return mapValue( - bv, getBuilder()->create(extractElementType, matrix, index, elemType, layout)); + return mapValue(bv, getBuilder()->create(extractElementType, matrix, index, + elemType, layout)); } - auto cv = transValue(ce->getComposite(), f, bb); auto indexedTy = ExtractValueInst::getIndexedType(cv->getType(), ce->getIndices()); if (!indexedTy) { // NOTE: "OpCompositeExtract" could extract a scalar component from a @@ -6348,17 +6269,22 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu case OpVectorExtractDynamic: { auto ce = static_cast(bv); - return mapValue(bv, ExtractElementInst::Create(transValue(ce->getVector(), f, bb), - transValue(ce->getIndex(), f, bb), bv->getName(), bb)); + auto result = transValue(ce->getVector(), f, bb); + auto index = transValue(ce->getIndex(), f, bb); + return mapValue(bv, ExtractElementInst::Create(result, index, bv->getName(), bb)); } case OpCompositeInsert: { auto ci = static_cast(bv); - if (ci->getComposite()->getType()->isTypeVector()) { + auto ciCompositeTy = ci->getComposite()->getType(); + const bool isVectorTy = ciCompositeTy->isTypeVector(); + [[maybe_unused]] SPIRVType *(SPIRVType::*getVectorElemType)() const = &SPIRVType::getVectorComponentType; + if (isVectorTy) { assert(ci->getIndices().size() == 1 && "Invalid index"); - return mapValue(bv, InsertElementInst::Create( - transValue(ci->getComposite(), f, bb), transValue(ci->getObject(), f, bb), - ConstantInt::get(*m_context, APInt(32, ci->getIndices()[0])), bv->getName(), bb)); + auto result = transValue(ci->getComposite(), f, bb); + auto object = transValue(ci->getObject(), f, bb); + auto index = ci->getIndices()[0]; + return mapValue(bv, getBuilder()->CreateInsertElement(result, object, index)); } if (ci->getComposite()->getType()->isTypeCooperativeMatrixKHR()) { @@ -6374,8 +6300,8 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu Value *matrix = transValue(ci->getComposite(), f, bb); Value *value = transValue(ci->getObject(), f, bb); Value *index = getBuilder()->getInt32(ci->getIndices()[0]); - return mapValue(bv, getBuilder()->create(matrix->getType(), matrix, value, index, - elemType, layout)); + return mapValue(bv, getBuilder()->create(matrix->getType(), matrix, value, + index, elemType, layout)); } auto cv = transValue(ci->getComposite(), f, bb); @@ -6402,9 +6328,10 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu case OpVectorInsertDynamic: { auto ci = static_cast(bv); - return mapValue(bv, - InsertElementInst::Create(transValue(ci->getVector(), f, bb), transValue(ci->getComponent(), f, bb), - transValue(ci->getIndex(), f, bb), bv->getName(), bb)); + auto result = transValue(ci->getVector(), f, bb); + auto component = transValue(ci->getComponent(), f, bb); + auto index = transValue(ci->getIndex(), f, bb); + return mapValue(bv, getBuilder()->CreateInsertElement(result, component, index, bv->getName())); } case OpVectorShuffle: { @@ -7640,6 +7567,10 @@ void SPIRVToLLVM::getImageDesc(SPIRVValue *bImageInst, ExtractedImageInfo *info) info->convertingSamplerIdx = getBuilder()->CreateExtractValue(image, idxs.convertingSamplerIdx); } + if (getPipelineOptions()->getGlState().enableDepthCompareParam) { + info->compareParamPointer = getBuilder()->CreateExtractValue(image, idxs.compareParamPointer); + } + // Analyze the data flow for coheren/volatile/(non-)uniformness. bool forceNonUniform = isShaderStageInMask(convertToShaderStage(m_execModule), getPipelineOptions()->forceNonUniformResourceIndexStageMask); @@ -8202,6 +8133,7 @@ Value *SPIRVToLLVM::transSPIRVImageSampleFromInst(SPIRVInstruction *bi, BasicBlo Value *addr[lgc::Builder::ImageAddressCount] = {}; addr[lgc::Builder::ImageAddressIdxCoordinate] = transValue(bii->getOpValue(opndIdx++), bb->getParent(), bb); + bool hasDref = false; switch (unsigned(bii->getOpCode())) { case OpImageSampleDrefImplicitLod: case OpImageSampleDrefExplicitLod: @@ -8213,6 +8145,7 @@ Value *SPIRVToLLVM::transSPIRVImageSampleFromInst(SPIRVInstruction *bi, BasicBlo case OpImageSparseSampleProjDrefExplicitLod: // This instruction has a dref operand. addr[lgc::Builder::ImageAddressIdxZCompare] = transValue(bii->getOpValue(opndIdx++), bb->getParent(), bb); + hasDref = true; break; default: break; @@ -8241,6 +8174,24 @@ Value *SPIRVToLLVM::transSPIRVImageSampleFromInst(SPIRVInstruction *bi, BasicBlo Value *result = getBuilder()->CreateImageSample(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, imageInfo.samplerPointer, addr); + if (getPipelineOptions()->getGlState().enableDepthCompareParam && hasDref) { + Value *compareParamPtr = imageInfo.compareParamPointer; + // compareParam descriptor comes from OGLP struct CompareParamDescriptor + // DW1: depthComparModeParam { uint32_t compareMode : 1; } + auto depthComparModeParamPtr = + getBuilder()->CreateInBoundsGEP(getBuilder()->getInt32Ty(), compareParamPtr, getBuilder()->getInt32(0)); + Value *depthComparModeParam = + getBuilder()->CreateAlignedLoad(getBuilder()->getInt32Ty(), depthComparModeParamPtr, Align(4)); + Value *compareMode = getBuilder()->CreateAnd(depthComparModeParam, getBuilder()->getInt32(0x1)); + Value *isZero = getBuilder()->CreateICmpEQ(compareMode, getBuilder()->getInt32(0)); + + addr[lgc::Builder::ImageAddressIdxZCompare] = nullptr; + Value *noZCompareInst = getBuilder()->CreateImageSample(resultTy, imageInfo.dim, imageInfo.flags, + imageInfo.imagePointer, imageInfo.samplerPointer, addr); + + result = getBuilder()->CreateSelect(isZero, noZCompareInst, result); + } + if (!m_convertingSamplers.empty()) { Value *planes = PoisonValue::get(ArrayType::get(getBuilder()->getDescPtrTy(), 3)); for (unsigned i = 0; i < 3; ++i) { @@ -11346,63 +11297,63 @@ void SPIRVToLLVM::createXfbMetadata(bool hasXfbOuts) { Value *SPIRVToLLVM::transCooperativeMatrixArithInst(SPIRVValue *spvVal, BasicBlock *bb) { auto oc = spvVal->getOpCode(); Function *func = bb->getParent(); - CooperativeMatrixArithOp arithOp; + lgc::xdl::CooperativeMatrixArithOp arithOp; switch (oc) { case OpFNegate: - arithOp = CooperativeMatrixArithOp::FSub; + arithOp = lgc::xdl::CooperativeMatrixArithOp::FSub; break; case OpSNegate: - arithOp = CooperativeMatrixArithOp::ISub; + arithOp = lgc::xdl::CooperativeMatrixArithOp::ISub; break; case OpFAdd: - arithOp = CooperativeMatrixArithOp::FAdd; + arithOp = lgc::xdl::CooperativeMatrixArithOp::FAdd; break; case OpIAdd: - arithOp = CooperativeMatrixArithOp::IAdd; + arithOp = lgc::xdl::CooperativeMatrixArithOp::IAdd; break; case OpISub: - arithOp = CooperativeMatrixArithOp::ISub; + arithOp = lgc::xdl::CooperativeMatrixArithOp::ISub; break; case OpFSub: - arithOp = CooperativeMatrixArithOp::FSub; + arithOp = lgc::xdl::CooperativeMatrixArithOp::FSub; break; case OpIMul: - arithOp = CooperativeMatrixArithOp::IMul; + arithOp = lgc::xdl::CooperativeMatrixArithOp::IMul; break; case OpFMul: - arithOp = CooperativeMatrixArithOp::FMul; + arithOp = lgc::xdl::CooperativeMatrixArithOp::FMul; break; case OpFDiv: - arithOp = CooperativeMatrixArithOp::FDiv; + arithOp = lgc::xdl::CooperativeMatrixArithOp::FDiv; break; case OpSDiv: - arithOp = CooperativeMatrixArithOp::SDiv; + arithOp = lgc::xdl::CooperativeMatrixArithOp::SDiv; break; case OpUDiv: - arithOp = CooperativeMatrixArithOp::UDiv; + arithOp = lgc::xdl::CooperativeMatrixArithOp::UDiv; break; case OpFMod: - arithOp = CooperativeMatrixArithOp::FMod; + arithOp = lgc::xdl::CooperativeMatrixArithOp::FMod; break; case OpSMod: - arithOp = CooperativeMatrixArithOp::SMod; + arithOp = lgc::xdl::CooperativeMatrixArithOp::SMod; break; case OpUMod: - arithOp = CooperativeMatrixArithOp::UMod; + arithOp = lgc::xdl::CooperativeMatrixArithOp::UMod; break; case OpSRem: - arithOp = CooperativeMatrixArithOp::SRem; + arithOp = lgc::xdl::CooperativeMatrixArithOp::SRem; break; case OpFRem: - arithOp = CooperativeMatrixArithOp::FRem; + arithOp = lgc::xdl::CooperativeMatrixArithOp::FRem; break; default: llvm_unreachable("Not support arithmetic for cooperative matrix"); return nullptr; } - lgc::CooperativeMatrixLayout layout = lgc::CooperativeMatrixLayout::InvalidLayout; - lgc::CooperativeMatrixElementType elemType = lgc::CooperativeMatrixElementType::Unknown; + auto layout = lgc::xdl::CooperativeMatrixLayout::InvalidLayout; + auto elemType = lgc::xdl::CooperativeMatrixElementType::Unknown; unsigned kSize = 16; if (oc == OpFNegate || oc == OpSNegate) { auto unary = static_cast(spvVal); @@ -11417,9 +11368,9 @@ Value *SPIRVToLLVM::transCooperativeMatrixArithInst(SPIRVValue *spvVal, BasicBlo static_cast(unary->getOperand(0)->getType()->getCooperativeMatrixKHRUse()), elemType, rows, columns); } - Type *resultTy = getBuilder()->getCooperativeMatrixTy(elemType, layout, kSize); - return getBuilder()->create(resultTy, arithOp, Constant::getNullValue(srcVal->getType()), - srcVal, elemType, layout); + Type *resultTy = lgc::xdl::getCooperativeMatrixTy(*getBuilder(), elemType, layout, kSize); + return getBuilder()->create( + resultTy, arithOp, Constant::getNullValue(srcVal->getType()), srcVal, elemType, layout); } else { auto binary = static_cast(spvVal); Value *lhs = transValue(binary->getOperand(0), func, bb); @@ -11434,8 +11385,8 @@ Value *SPIRVToLLVM::transCooperativeMatrixArithInst(SPIRVValue *spvVal, BasicBlo static_cast(binary->getOperand(0)->getType()->getCooperativeMatrixKHRUse()), elemType, rows, columns); } - Type *resultTy = getBuilder()->getCooperativeMatrixTy(elemType, layout, kSize); - return getBuilder()->create(resultTy, arithOp, lhs, rhs, elemType, layout); + Type *resultTy = lgc::xdl::getCooperativeMatrixTy(*getBuilder(), elemType, layout, kSize); + return getBuilder()->create(resultTy, arithOp, lhs, rhs, elemType, layout); } } @@ -11443,14 +11394,16 @@ Value *SPIRVToLLVM::transCooperativeMatrixArithInst(SPIRVValue *spvVal, BasicBlo // Translate cooperative matrix construction instructions to LLVM IR Value *SPIRVToLLVM::transCooperativeMatrixKHRFromConstruct(SPIRVType *spvCoopMatTy, const std::vector &constituents) { - lgc::CooperativeMatrixElementType elemType = mapToBasicType(spvCoopMatTy->getCooperativeMatrixKHRComponentType()); + lgc::xdl::CooperativeMatrixElementType elemType = + mapToBasicType(spvCoopMatTy->getCooperativeMatrixKHRComponentType()); unsigned rows = spvCoopMatTy->getCooperativeMatrixKHRRows(); unsigned columns = spvCoopMatTy->getCooperativeMatrixKHRColumns(); const unsigned kSize = rows > columns ? rows : columns; - lgc::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout( + lgc::xdl::CooperativeMatrixLayout layout = getCooperativeMatrixKHRLayout( static_cast(spvCoopMatTy->getCooperativeMatrixKHRUse()), elemType, rows, columns); - Type *coopMatrixTy = getBuilder()->getCooperativeMatrixTy(elemType, layout, kSize); - return getBuilder()->create(coopMatrixTy, constituents[0], elemType, layout, kSize); + Type *coopMatrixTy = lgc::xdl::getCooperativeMatrixTy(*getBuilder(), elemType, layout, kSize); + return getBuilder()->create(coopMatrixTy, constituents[0], elemType, layout, + kSize); } } // namespace SPIRV diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.h b/llpc/translator/lib/SPIRV/SPIRVReader.h index 42dcd27431..40f504df17 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.h +++ b/llpc/translator/lib/SPIRV/SPIRVReader.h @@ -5,7 +5,7 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -46,6 +46,7 @@ #include "vkgcDefs.h" #include "compilerutils/LoweringPointerTupleMap.h" #include "lgc/Builder.h" +#include "lgc/LgcXdlTypes.h" namespace llvm { class Module; @@ -97,6 +98,7 @@ struct ImageTypeIndices { unsigned samplerPointer = ~0; unsigned samplerStride = ~0; unsigned convertingSamplerIdx = ~0; + unsigned compareParamPointer = ~0; }; class SPIRVToLLVM { @@ -181,6 +183,7 @@ class SPIRVToLLVM { Value *fmaskPointer = nullptr; Value *samplerPointer = nullptr; Value *convertingSamplerIdx = nullptr; + Value *compareParamPointer = nullptr; }; // Load image and/or sampler descriptors, and get information from the image @@ -269,7 +272,7 @@ class SPIRVToLLVM { }; typedef DenseMap SPIRVToLLVMFullTypeMap; - typedef CompilerUtils::LoweringPointerTupleMap SPIRVToLLVMValueMap; + typedef compilerutils::LoweringPointerTupleMap SPIRVToLLVMValueMap; typedef DenseMap SPIRVBlockToLLVMStructMap; typedef DenseMap SPIRVToLLVMFunctionMap; typedef DenseMap BuiltinVarMap; @@ -337,17 +340,10 @@ class SPIRVToLLVM { SmallVector llvmInstructions; }; - lgc::CooperativeMatrixElementType mapToBasicType(SPIRVType *const spvType); - lgc::CooperativeMatrixLayout getCooperativeMatrixKHRLayout(CooperativeMatrixUse use, - lgc::CooperativeMatrixElementType elemTy, unsigned rows, - unsigned columns); - - enum CooperativeMatrixMemoryAccess { - CooperativeMatrixMemoryAccessNone = 0x00, - CooperativeMatrixMemoryAccessVolatile = 0x01, - CooperativeMatrixMemoryAccessCoherent = 0x02, - CooperativeMatrixMemoryAccessTemporal = 0x04, - }; + lgc::xdl::CooperativeMatrixElementType mapToBasicType(SPIRVType *const spvType); + lgc::xdl::CooperativeMatrixLayout getCooperativeMatrixKHRLayout(CooperativeMatrixUse use, + lgc::xdl::CooperativeMatrixElementType elemTy, + unsigned rows, unsigned columns); Value *transCooperativeMatrixArithInst(SPIRVValue *spvVal, BasicBlock *bb); Value *transCooperativeMatrixKHRFromConstruct(SPIRVType *spvCoopMatRowTy, const std::vector &constituents); @@ -358,6 +354,8 @@ class SPIRVToLLVM { // input for another load, we are using a vector here, which contains a pointer to the instruction along with it being // either a load or store. MapVector> m_spirvMemopToLlvmMemopMapping; + bool checkVolatile(SPIRVValue *pointer, SPIRVMemoryAccess *access, bool isLoad); + bool checkCoherent(SPIRVValue *pointer, SPIRVMemoryAccess *access, bool isLoad); lgc::Builder *getBuilder() const { return m_builder; } diff --git a/llpc/translator/lib/SPIRV/SPIRVToLLVMDbgTran.cpp b/llpc/translator/lib/SPIRV/SPIRVToLLVMDbgTran.cpp index caba0d13bb..493f7be341 100644 --- a/llpc/translator/lib/SPIRV/SPIRVToLLVMDbgTran.cpp +++ b/llpc/translator/lib/SPIRV/SPIRVToLLVMDbgTran.cpp @@ -46,6 +46,7 @@ #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" +#include using namespace std; using namespace SPIRVDebug::Operand; @@ -254,7 +255,11 @@ DIType *SPIRVToLLVMDbgTran::transTypePointer(const SPIRVExtInst *DebugInst) { Ty = Builder.createPointerType(PointeeTy, BM->getAddressingModel() * 32, 0, AS); if (Flags & SPIRVDebug::FlagIsObjectPointer) +#if !LLVM_MAIN_REVISION || LLVM_MAIN_REVISION >= 524303 + Ty = Builder.createObjectPointerType(Ty, /*Implicit=*/true); +#else Ty = Builder.createObjectPointerType(Ty); +#endif else if (Flags & SPIRVDebug::FlagIsArtificial) Ty = Builder.createArtificialType(Ty); @@ -902,22 +907,14 @@ Instruction *SPIRVToLLVMDbgTran::transDebugIntrinsic(const SPIRVExtInst *DebugIn // If new Debug Info Format is turned OFF then 'insertDeclare' will return Instruction (Intrinsic) which we are // storing in hashMap. This part will be removed after the transition, since new DbgInfoFormat will be turned ON // always and we will return nullptr from that point. This comment applies also to 'insertDbgValueIntrinsic' below. - if (DbgInst.is()) { - return DbgInst.get(); - } else { - return nullptr; - } + return dyn_cast_or_null(DbgInst); } case SPIRVDebug::Value: { using namespace SPIRVDebug::Operand::DebugValue; auto LocalVar = GetLocalVar(Ops[DebugLocalVarIdx]); LLPCDbgInstPtr DbgInst = Builder.insertDbgValueIntrinsic(GetValue(Ops[ValueIdx]), LocalVar.first, GetExpression(Ops[ExpressionIdx]), LocalVar.second, BB); - if (DbgInst.is()) { - return DbgInst.get(); - } else { - return nullptr; - } + return dyn_cast_or_null(DbgInst); } default: llvm_unreachable("Unknown debug intrinsic!"); diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRV.debug.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRV.debug.h index e0f3676091..431a6b98a4 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRV.debug.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRV.debug.h @@ -1,3 +1,5 @@ +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #ifndef SPIRV_DEBUG_H #define SPIRV_DEBUG_H #include "SPIRVUtil.h" diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h index f47c49c3b8..c9fdd8f986 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h @@ -1,3 +1,5 @@ +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + #ifndef SPIRV_LIBSPIRV_SPIRVERRORENUM_H #define SPIRV_LIBSPIRV_SPIRVERRORENUM_H diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h index 330cac14e6..e6659102c7 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -671,7 +671,7 @@ class SPIRVBinary : public SPIRVInstTemplateBase { void validate() const override { SPIRVId Op1 = Ops[0]; SPIRVId Op2 = Ops[1]; - SPIRVType *Op1Ty, *Op2Ty; + [[maybe_unused]] SPIRVType *Op1Ty, *Op2Ty; SPIRVInstruction::validate(); if (getValue(Op1)->isForward() || getValue(Op2)->isForward()) return; @@ -696,8 +696,6 @@ class SPIRVBinary : public SPIRVInstTemplateBase { } else { Op2Ty = getValueType(Op2); } - (void)Op1Ty; - (void)Op2Ty; if (isBinaryOpCode(OpCode)) { assert( (getValueType(Op1) == getValueType(Op2) || (Op1Ty->isTypeInt() && Op2Ty->isTypeInt(Op1Ty->getBitWidth()))) && @@ -1324,7 +1322,6 @@ class SPIRVVectorTimesScalar : public SPIRVInstruction { "Result type must be a vector of floating-point type"); assert(getValueType(Vector)->getVectorComponentType() == getValueType(getId())->getVectorComponentType() && "Scalar must have the same type as the Component Type in Result Type"); - SPIRVInstruction::validate(); } protected: @@ -1340,8 +1337,8 @@ class SPIRVUnary : public SPIRVInstTemplateBase { if (getValue(Op)->isForward()) return; if (isGenericNegateOpCode(OpCode)) { - SPIRVType *ResTy = nullptr; - SPIRVType *OpTy = nullptr; + [[maybe_unused]] SPIRVType *ResTy = nullptr; + [[maybe_unused]] SPIRVType *OpTy = nullptr; if (Type->isTypeCooperativeMatrixKHR() && (static_cast(OpCode) == OpSNegate || static_cast(OpCode) == OpFNegate)) { @@ -1351,8 +1348,7 @@ class SPIRVUnary : public SPIRVInstTemplateBase { ResTy = Type->isTypeVector() ? Type->getVectorComponentType() : Type; OpTy = Type->isTypeVector() ? getValueType(Op)->getVectorComponentType() : getValueType(Op); } - (void)ResTy; - (void)OpTy; + // NOTE: SPIR-V spec only request OpFNegate to match the type between Operand and Result. assert((getType() == getValueType(Op) || static_cast(OpCode) != OpFNegate) && "Inconsistent type"); assert((ResTy->isTypeInt() || ResTy->isTypeFloat()) && "Invalid type for Generic Negate instruction"); @@ -1361,8 +1357,9 @@ class SPIRVUnary : public SPIRVInstTemplateBase { : 1) && "Invalid vector component Width for Generic Negate instruction"); } - if (Type->isTypeCooperativeMatrixKHR() && static_cast(OpCode) >= OpConvertFToU && - static_cast(OpCode) <= OpFConvert) { + const bool allowedMatrixConversion = + (static_cast(OpCode) >= OpConvertFToU && static_cast(OpCode) <= OpFConvert); + if (Type->isTypeCooperativeMatrixKHR() && allowedMatrixConversion) { SPIRVType *OpTy = getValueType(Op); assert(OpTy->isTypeCooperativeMatrixKHR() && Type->getCooperativeMatrixKHRScope() == OpTy->getCooperativeMatrixKHRScope() && @@ -1759,9 +1756,11 @@ class SPIRVCompositeExtract : public SPIRVInstruction { // need to trace through the base type for struct types void validate() const override { SPIRVInstruction::validate(); - assert(getValueType(Composite)->isTypeArray() || getValueType(Composite)->isTypeStruct() || - getValueType(Composite)->isTypeVector() || getValueType(Composite)->isTypeMatrix() || - getValueType(Composite)->isTypeCooperativeMatrixKHR()); + [[maybe_unused]] const bool typeCheck = + getValueType(Composite)->isTypeArray() || getValueType(Composite)->isTypeStruct() || + getValueType(Composite)->isTypeVector() || getValueType(Composite)->isTypeMatrix() || + getValueType(Composite)->isTypeCooperativeMatrixKHR(); + assert(typeCheck); } SPIRVId Composite; std::vector Indices; @@ -1798,9 +1797,11 @@ class SPIRVCompositeInsert : public SPIRVInstruction { SPIRVInstruction::validate(); assert(OpCode == OC); assert(WordCount == Indices.size() + FixedWordCount); - assert(getValueType(Composite)->isTypeArray() || getValueType(Composite)->isTypeStruct() || - getValueType(Composite)->isTypeVector() || getValueType(Composite)->isTypeMatrix()); - assert(Type == getValueType(Composite)); + [[maybe_unused]] const bool typeCheck = + getValueType(Composite)->isTypeArray() || getValueType(Composite)->isTypeStruct() || + getValueType(Composite)->isTypeVector() || getValueType(Composite)->isTypeMatrix() || + getValueType(Composite)->isTypeCooperativeMatrixKHR(); + assert(typeCheck && Type == getValueType(Composite)); } SPIRVId Object; SPIRVId Composite; diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCode.h index 3bd057d2d3..ae047ac764 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCode.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCode.h @@ -5,7 +5,7 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), @@ -60,7 +60,11 @@ inline bool isAtomicOpCode(Op OpCode) { (OpCode == OpAtomicFMaxEXT) || (OpCode == OpAtomicFAddEXT); } inline bool isBinaryOpCode(Op OpCode) { - return ((unsigned)OpCode >= OpIAdd && (unsigned)OpCode <= OpFMod) || OpCode == OpDot; + return ((unsigned)OpCode >= OpIAdd && (unsigned)OpCode <= OpFMod) || OpCode == OpDot +#if VKI_KHR_SHADER_FMA + || OpCode == OpFmaKHR +#endif + ; } inline bool isShiftOpCode(Op OpCode) { diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h index d990ab4411..fcbfbc6547 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h @@ -1,3 +1,5 @@ +/* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. */ + _SPIRV_OP(Nop, 0) _SPIRV_OP(Undef, 1) _SPIRV_OP(SourceContinued, 2) diff --git a/llpc/unittests/CMakeLists.txt b/llpc/unittests/CMakeLists.txt index 4038cc2626..56215e93de 100644 --- a/llpc/unittests/CMakeLists.txt +++ b/llpc/unittests/CMakeLists.txt @@ -57,7 +57,7 @@ function(add_llpc_unittest_impl test_suite test_name) target_include_directories(${test_name} PRIVATE ${LLVM_INCLUDE_DIRS} # This is necessary to discover the auto-generated llvm-config.h header. ) - set_compiler_options(${test_name} ${LLPC_ENABLE_WERROR}) + set_compiler_options(${test_name}) get_target_property(test_suite_folder ${test_suite} FOLDER) if(test_suite_folder) diff --git a/llpc/unittests/lit.site.cfg.py.in b/llpc/unittests/lit.site.cfg.py.in index a089b64983..f58ae2cd45 100644 --- a/llpc/unittests/lit.site.cfg.py.in +++ b/llpc/unittests/lit.site.cfg.py.in @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + @LIT_SITE_CFG_IN_HEADER@ import sys diff --git a/llvmraytracing/CMakeLists.txt b/llvmraytracing/CMakeLists.txt index 932fd99bb6..9eae74422b 100644 --- a/llvmraytracing/CMakeLists.txt +++ b/llvmraytracing/CMakeLists.txt @@ -38,7 +38,7 @@ endfunction() option(LLVMRAYTRACING_BUILD_TESTS "Build raytracing tests") -add_llvm_library(LLVMRaytracing +set(LLVMRaytracingLibFiles lib/CleanupContinuations.cpp lib/ContStateBuilder.cpp lib/Continuations.cpp @@ -63,7 +63,10 @@ add_llvm_library(LLVMRaytracing lib/PayloadAccessQualifiers.cpp lib/RematSupport.cpp lib/RemoveTypesMetadata.cpp - lib/SpecializeDriverShaders.cpp + lib/SpecializeDriverShaders.cpp) + +add_llvm_library(LLVMRaytracing + ${LLVMRaytracingLibFiles} DEPENDS intrinsics_gen @@ -91,12 +94,8 @@ target_link_libraries(LLVMRaytracing PUBLIC llvm_dialects ${extra_llvm_libs} llp set_compiler_options(LLVMRaytracing) # TableGen for dialects -if (EXISTS ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) - set(RAYTRACING_TABLEGEN_EXE ${LLVM_TOOLS_BINARY_PATH}/llvm-dialects-tblgen) -else() - set(RAYTRACING_TABLEGEN_EXE $) -endif() -set(RAYTRACING_TABLEGEN_TARGET llvm-dialects-tblgen) +include(../cmake/DialectsTablegen.cmake) +set_dialects_tablegen_exe(RAYTRACING) macro(raytracing_tablegen DIALECTNAME FILE OUTPUT_FILENAME) set(LLVM_TARGET_DEFINITIONS "${FILE}") @@ -141,3 +140,4 @@ if(LLVMRAYTRACING_BUILD_TESTS) add_custom_target(check-continuations DEPENDS check-llvmraytracing) add_custom_target(check-continuations-units DEPENDS check-llvmraytracing-units) endif() + diff --git a/llvmraytracing/include/lgc/GpurtDialect.td b/llvmraytracing/include/lgc/GpurtDialect.td index b4a965d9be..01bdef3cc0 100644 --- a/llvmraytracing/include/lgc/GpurtDialect.td +++ b/llvmraytracing/include/lgc/GpurtDialect.td @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -380,3 +380,11 @@ def GpurtGetRayQueryDispatchIdOp : GpurtOp<"get.ray.query.dispatch.id", [Memory< components. }]; } + +def GpurtMakePcOp : GpurtOp<"make.pc", [Memory<[]>, WillReturn]> { + let arguments = (ins I32:$va); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + let summary = "create a 64-bit extended virtual address out of a 32-bit input virtual address"; +} diff --git a/llvmraytracing/include/lgc/LgcCpsDialect.h b/llvmraytracing/include/lgc/LgcCpsDialect.h index c1f5f0d2f0..79f6eeed02 100644 --- a/llvmraytracing/include/lgc/LgcCpsDialect.h +++ b/llvmraytracing/include/lgc/LgcCpsDialect.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -25,6 +25,7 @@ #pragma once +#include "llpc/GpurtEnums.h" #include "llvm-dialects/Dialect/Builder.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/IRBuilder.h" @@ -47,14 +48,6 @@ enum class RayTracingShaderStage; } // namespace lgc::rt namespace lgc::cps { -enum class CpsLevel : uint8_t { - RayGen = 1, - ClosestHit_Miss_Callable, - Traversal, - AnyHit_CombinedIntersection_AnyHit, - Intersection, - Count, -}; constexpr unsigned stackAddrSpace = 32; @@ -71,10 +64,13 @@ unsigned getArgumentDwordCount(const llvm::DataLayout &DL, llvm::Type *type); unsigned getArgumentDwordCount(const llvm::DataLayout &DL, llvm::ArrayRef types); std::optional getRemainingArgumentDwords(const llvm::DataLayout &DL, llvm::ArrayRef arguments); +std::optional getMaxArgumentVgprs(const llvm::Module &m); +void setMaxArgumentVgprs(llvm::Module &m, unsigned bound); + bool isCpsFunction(const llvm::Function &fn); -void setCpsFunctionLevel(llvm::Function &fn, CpsLevel level); -CpsLevel getCpsLevelFromFunction(const llvm::Function &fn); -CpsLevel getCpsLevelForShaderStage(lgc::rt::RayTracingShaderStage stage); +void setCpsFunctionLevel(llvm::Function &fn, CpsSchedulingLevel level); +CpsSchedulingLevel getCpsLevelFromFunction(const llvm::Function &fn); +CpsSchedulingLevel getCpsLevelForShaderStage(lgc::rt::RayTracingShaderStage stage); uint8_t getPotentialCpsReturnLevels(lgc::rt::RayTracingShaderStage stage); llvm::Value *lowerAsContinuationReference(llvm::IRBuilder<> &Builder, lgc::cps::AsContinuationReferenceOp &AsCROp, llvm::Value *Relocation = nullptr); diff --git a/llvmraytracing/include/lgc/LgcIlCpsDialect.td b/llvmraytracing/include/lgc/LgcIlCpsDialect.td index 06a5c49072..cd3c2b410c 100644 --- a/llvmraytracing/include/lgc/LgcIlCpsDialect.td +++ b/llvmraytracing/include/lgc/LgcIlCpsDialect.td @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -70,7 +70,7 @@ def SetLocalRootIndexOp : LgcIlCpsOp<"setLocalRootIndex", [WillReturn]> { } def ContinueOp : LgcIlCpsOp<"continue", [NoReturn]> { - let arguments = (ins I64:$shaderAddr, I32:$csp, I32:$shaderIndex, I32:$returnAddr, varargs:$tail); + let arguments = (ins (or I32, I64):$shaderAddr, I32:$csp, I32:$shaderIndex, I32:$returnAddr, varargs:$tail); let results = (outs); let summary = @@ -91,7 +91,7 @@ def ContinueOp : LgcIlCpsOp<"continue", [NoReturn]> { } def WaitContinueOp : LgcIlCpsOp<"waitContinue", [NoReturn]> { - let arguments = (ins I64:$shaderAddr, I64:$waitMask, I32:$csp, I32:$shaderIndex, I32:$returnAddr, varargs:$tail); + let arguments = (ins (or I32, I64):$shaderAddr, I64:$waitMask, I32:$csp, I32:$shaderIndex, I32:$returnAddr, varargs:$tail); let results = (outs); let summary = diff --git a/llvmraytracing/include/llvmraytracing/Continuations.h b/llvmraytracing/include/llvmraytracing/Continuations.h index bdd7977877..a6f70f5f5a 100644 --- a/llvmraytracing/include/llvmraytracing/Continuations.h +++ b/llvmraytracing/include/llvmraytracing/Continuations.h @@ -135,7 +135,7 @@ Value *getDXILSystemData(IRBuilder<> &B, Value *SystemData, Type *SystemDataTy, /// Replace call to intrinsic (lgc.rt.*) with a call to the driver /// implementation (_cont_*). Value *replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, Value *SystemData, lgc::rt::RayTracingShaderStage Kind, - CallInst *Call, Module *GpurtLibrary, CompilerUtils::CrossModuleInliner &Inliner, + CallInst *Call, Module *GpurtLibrary, compilerutils::CrossModuleInliner &Inliner, bool KeepBuilderPos = false); /// Promote pointer arguments of a GPURT function @Func to by-value if appropriate (e. g. depending on pointeetys diff --git a/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h index 1734a8e73a..314c23e9c3 100644 --- a/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h +++ b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h @@ -216,6 +216,8 @@ class ContHelper { // The raytracing ip level that is available on the target architecture. // This is exposed to gpurt code via the GetRtip intrinsic. static constexpr const char *MDRtipName = "continuation.rtip"; + // If this metadata is set, the backend is required to handle VPC unpacking. + static constexpr const char *MDDeferVPCUnpacking = "continuation.defer.vpc.unpack"; static std::optional extractZExtI32Constant(MDNode *Node) { if (Node) { @@ -372,6 +374,7 @@ class ContHelper { MODULE_METADATA_HELPER(MaxUsedPayloadRegisterCount, MDMaxUsedPayloadRegisterCountName) MODULE_METADATA_HELPER(MaxPayloadRegisterCount, MDMaxPayloadRegisterCountName) MODULE_METADATA_HELPER(Rtip, MDRtipName) + MODULE_METADATA_HELPER(DeferVpcUnpacking, MDDeferVPCUnpacking) #undef MODULE_METADATA_HELPER diff --git a/llvmraytracing/include/llvmraytracing/CpsStackLowering.h b/llvmraytracing/include/llvmraytracing/CpsStackLowering.h index e9826e50b9..ec8243cacf 100644 --- a/llvmraytracing/include/llvmraytracing/CpsStackLowering.h +++ b/llvmraytracing/include/llvmraytracing/CpsStackLowering.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -75,10 +75,10 @@ class CpsStackLowering { static unsigned getContinuationStackAlignment() { return ContinuationStackAlignment; } - CompilerUtils::TypeLowering TypeLower; + compilerutils::TypeLowering TypeLower; private: - llvm::SmallVector convertStackPtrToI32(CompilerUtils::TypeLowering &, llvm::Type *); + llvm::SmallVector convertStackPtrToI32(compilerutils::TypeLowering &, llvm::Type *); void visitCpsAlloc(lgc::cps::AllocOp &); void visitCpsFree(lgc::cps::FreeOp &); void visitCpsPeek(lgc::cps::PeekOp &); diff --git a/llvmraytracing/include/llvmraytracing/LowerRayQuery.h b/llvmraytracing/include/llvmraytracing/LowerRayQuery.h index 2b9d3ae344..92cdd400b6 100644 --- a/llvmraytracing/include/llvmraytracing/LowerRayQuery.h +++ b/llvmraytracing/include/llvmraytracing/LowerRayQuery.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -33,9 +33,9 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/IR/PassManager.h" -namespace CompilerUtils { +namespace compilerutils { class TypeLowering; -} +} // namespace compilerutils namespace llvm_dialects { class Builder; @@ -169,7 +169,7 @@ class LowerRayQuery : public llvm::PassInfoMixin { llvm::SmallVector m_callsToLower; llvm::SmallSet m_funcsToLower; llvm_dialects::Builder *m_builder = nullptr; - CompilerUtils::TypeLowering *m_typeLowering = nullptr; + compilerutils::TypeLowering *m_typeLowering = nullptr; llvm::Type *m_rtqType = nullptr; unsigned m_traceRayId = 0; }; diff --git a/llvmraytracing/lib/CleanupContinuations.cpp b/llvmraytracing/lib/CleanupContinuations.cpp index 60dac80964..7ac4dce7a6 100644 --- a/llvmraytracing/lib/CleanupContinuations.cpp +++ b/llvmraytracing/lib/CleanupContinuations.cpp @@ -114,6 +114,7 @@ class CleanupContinuationsPassImpl { void lowerIntrinsicCall(Function *F, ContinuationData &Data); bool handleIntrinsics(llvm::ModuleAnalysisManager &AnalysisManager); void handleContStackIntrinsic(FunctionAnalysisManager &FAM, Function &F); + void handleGetShaderKind(Function &F); void lowerGetResumePoint(Module &Mod); bool lowerCompleteOp(Module &Mod); @@ -255,7 +256,7 @@ void CleanupContinuationsPassImpl::updateCpsStack(Function *F, Function *NewFunc Value *ContFrame = getContinuationFramePtr(F, IsStart, CpsInfo, &ToBeRemoved); if (CpsInfo.ContStateBytes != 0) { - CompilerUtils::replaceAllPointerUses(ContFrame, CpsStack, ToBeRemoved); + compilerutils::replaceAllPointerUses(ContFrame, CpsStack, ToBeRemoved); } else { // If there is no continuation state, replace it with a poison // value instead of a zero-sized stack allocation. @@ -478,7 +479,7 @@ void CleanupContinuationsPassImpl::processContinuations() { F->eraseMetadata(FuncData.second.MD->getMetadataID()); auto &Context = F->getContext(); auto *NewFuncTy = FunctionType::get(Type::getVoidTy(Context), AllArgTypes, false); - Function *NewFunc = CompilerUtils::cloneFunctionHeader(*F, NewFuncTy, ParamAttrs); + Function *NewFunc = compilerutils::cloneFunctionHeader(*F, NewFuncTy, ParamAttrs); NewFunc->takeName(F); // Create helper struct for return values and RAUW on them @@ -656,7 +657,7 @@ void CleanupContinuationsPassImpl::lowerIntrinsicCall(Function *F, ContinuationD if (!Stage) return; - CompilerUtils::CrossModuleInliner CrossInliner; + compilerutils::CrossModuleInliner CrossInliner; Value *SystemDataArg = F->getArg(CpsArgIdx::SystemData); Type *SystemDataTy = SystemDataArg->getType(); @@ -787,7 +788,7 @@ void CleanupContinuationsPassImpl::handleContStackIntrinsic(FunctionAnalysisMana Replacement = Builder.CreateAlignedLoad(DestTy, Ptr, Align(CpsStackLowering::getContinuationStackAlignment())); if (FuncName.starts_with("LoadLastUse")) - CompilerUtils::setIsLastUseLoad(*cast(Replacement)); + compilerutils::setIsLastUseLoad(*cast(Replacement)); IsMemoryAccess = true; } else if (FuncName.starts_with("Store")) { @@ -816,6 +817,26 @@ void CleanupContinuationsPassImpl::handleContStackIntrinsic(FunctionAnalysisMana }); } +void CleanupContinuationsPassImpl::handleGetShaderKind(Function &F) { + assert(F.getReturnType()->isIntegerTy(32) && F.arg_size() == 0); + + llvm::forEachCall(F, [&](llvm::CallInst &CInst) { + Function *Caller = CInst.getFunction(); + auto Stage = lgc::rt::getLgcRtShaderStage(Caller); + + // Ignore GetShaderKind calls where we cannot find the shader kind. + // This happens e.g. in gpurt-implemented intrinsics that got inlined, + // but not removed. + if (!Stage) + return; + + DXILShaderKind ShaderKind = ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); + auto *ShaderKindVal = ConstantInt::get(F.getReturnType(), static_cast(ShaderKind)); + CInst.replaceAllUsesWith(ShaderKindVal); + CInst.eraseFromParent(); + }); +} + void CleanupContinuationsPassImpl::lowerGetResumePoint(Module &Mod) { for (auto &F : make_early_inc_range(Mod)) { auto FuncName = F.getName(); @@ -925,7 +946,7 @@ llvm::PreservedAnalyses CleanupContinuationsPassImpl::run() { assert(StackAddrspaceMD.has_value() && "Missing continuation.stackAddrspace metadata"); auto StackAddrspace = StackAddrspaceMD.value(); - if (StackAddrspace == ContStackAddrspace::Global) + if (StackAddrspace == ContStackAddrspace::Global || StackAddrspace == ContStackAddrspace::GlobalLLPC) GetGlobalMemBase = getContinuationStackGlobalMemBase(*GpurtLibrary); StackLowering.emplace(Mod.getContext(), static_cast(StackAddrspace)); @@ -948,6 +969,13 @@ llvm::PreservedAnalyses CleanupContinuationsPassImpl::run() { Changed |= lowerCompleteOp(Mod); + for (auto &F : Mod.functions()) { + if (F.getName().starts_with("_AmdGetShaderKind")) { + handleGetShaderKind(F); + Changed |= true; + } + } + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/llvmraytracing/lib/ContStateBuilder.cpp b/llvmraytracing/lib/ContStateBuilder.cpp index c6a9021139..b13287dc9f 100644 --- a/llvmraytracing/lib/ContStateBuilder.cpp +++ b/llvmraytracing/lib/ContStateBuilder.cpp @@ -33,13 +33,10 @@ #include "compilerutils/CompilerUtils.h" #include "compilerutils/IRSerializationUtils.h" #include "llvmraytracing/ContinuationsUtil.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/NoFolder.h" -#include "llvm/IR/TypedPointerType.h" #include "llvm/Support/OptimizedStructLayout.h" #include "llvm/Transforms/Coroutines/SpillUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -276,62 +273,14 @@ class ContStateBuilderImpl { return false; } -std::string getLabel(Function *F) { - if (F->hasName()) - return F->getName().str(); - ModuleSlotTracker MST(F->getParent()); - MST.incorporateFunction(*F); - - return std::to_string(MST.getLocalSlot(F)); -} - -std::string getLabel(BasicBlock *BB) { - if (BB->hasName()) - return BB->getName().str(); - - Function *F = BB->getParent(); - - ModuleSlotTracker MST(F->getParent()); - MST.incorporateFunction(*F); - - return std::to_string(MST.getLocalSlot(BB)); -} - -std::string getLabel(Value *V) { - if (V->hasName()) - return V->getName().str(); - - if (!isa(V)) - return ""; - - BasicBlock *BB = dyn_cast(V)->getParent(); - Function *F = BB->getParent(); - - ModuleSlotTracker MST(F->getParent()); - MST.incorporateFunction(*F); - - return std::to_string(MST.getLocalSlot(V)); -} - -std::string getAllNames(const SmallSet &List) { - std::string S; - if (List.empty()) - return ""; - - for (BasicBlock *BB : List) - S = S + " %" + getLabel(BB); - - return S; -} - void CoroFrameRow::dump() const { if (Def) { dbgs() << "\tDef: "; LLVM_DEBUG(Def->dump()); if (isa(Def)) - dbgs() << "\tDefBB: %" << getLabel(cast(Def)->getParent()) << "\n"; + dbgs() << "\tDefBB: %" << compilerutils::bb::getLabel(cast(Def)->getParent()) << "\n"; else if (isa(Def)) - dbgs() << "\tDefBB: %" << getLabel(cast(Def)->getParent()) << "\n"; + dbgs() << "\tDefBB: %" << compilerutils::bb::getLabel(cast(Def)->getParent()) << "\n"; else dbgs() << "\tDefBB: Unknown Value Type\n"; } else { @@ -347,7 +296,7 @@ void CoroFrameRow::dump() const { dbgs() << "\tResidesInSuspendFrames: " << ResidesInSuspendFrame.size() << "\n"; if (!isa(Def)) { dbgs() << "\tSpilledOnDef: " << (SpilledOnDef ? "true" : "false") << "\n"; - dbgs() << "\tReloadedOnBB: " << getAllNames(ReloadedOnBB) << "\n"; + dbgs() << "\tReloadedOnBB: " << compilerutils::bb::getNamesForBasicBlocks(ReloadedOnBB) << "\n"; dbgs() << "\tSpills: " << Spills.size() << "\n"; dbgs() << "\tReloads: " << Reloads.size() << "\n"; } @@ -387,11 +336,11 @@ void CoroFrameStruct::dumpField(const OptimizedStructLayoutField &F, const CoroF const CoroFrameRow *Row = &FrameTable[Idx]; dbgs() << " Frame Table Row " << std::to_string(Idx); if (isa(Row->Def)) - dbgs() << " -- Alloca for %" << getLabel(Row->Def); + dbgs() << " -- Alloca for %" << compilerutils::bb::getLabel(Row->Def); else if (isa(Row->Def)) - dbgs() << " -- Spill of Argument %" << getLabel(Row->Def); + dbgs() << " -- Spill of Argument %" << compilerutils::bb::getLabel(Row->Def); else - dbgs() << " -- Spill of Inst %" << getLabel(Row->Def); + dbgs() << " -- Spill of Inst %" << compilerutils::bb::getLabel(Row->Def); // Determine if value is a spill or alloca if (auto *DefAlloca = dyn_cast(Row->Def)) { @@ -425,9 +374,9 @@ void CoroFrameStruct::dump(const CoroFrameTableTy &FrameTable) const { } dbgs() << "\tFrameStruct Size: " << Size << " bytes, "; dbgs() << "Align: " << Alignment.value() << " bytes\n"; - std::string SuspendBBName = SuspendBB ? getLabel(SuspendBB) : "nullptr"; + std::string SuspendBBName = SuspendBB ? compilerutils::bb::getLabel(SuspendBB) : "nullptr"; dbgs() << "\tSuspendBB: %" << SuspendBBName << "\n"; - std::string ResumeBBName = ResumeBB ? getLabel(ResumeBB) : "nullptr"; + std::string ResumeBBName = ResumeBB ? compilerutils::bb::getLabel(ResumeBB) : "nullptr"; dbgs() << "\tResumeBB: %" << ResumeBBName << "\n"; } @@ -953,7 +902,7 @@ void ContStateBuilderImpl::createFrameGEPs(SmallVector &DeadIn // the alloca. The GEP is put into the SpillBlock. The SpillBlock is // the entry point of each continuation, so any instrs put there will // be available to all continuations after the main function is split. - CompilerUtils::replaceAllPointerUses(Alloca, GepInst, DeadInstructions); + compilerutils::replaceAllPointerUses(Alloca, GepInst, DeadInstructions); // Alloca is dead, we may visit this Row more than once, so we need to // check if the value is in the DeadInstructions list already. @@ -1262,7 +1211,7 @@ void ContStateBuilderImpl::buildCoroutineFrame() { auto &Struct = R.value().second; LLVM_DEBUG(dbgs() << "Suspend " << R.index() << "\n"); LLVM_DEBUG(dbgs() << "\tSuspendInst: "; Suspend->dump()); - LLVM_DEBUG(dbgs() << "\tSuspendBB: %" << getLabel(Suspend->getParent()) << "\n"); + LLVM_DEBUG(dbgs() << "\tSuspendBB: %" << compilerutils::bb::getLabel(Suspend->getParent()) << "\n"); // Sink spill uses. This will move all uses of allocas to after the // CoroBegin ensuring that all access to the alloca ptr occur after diff --git a/llvmraytracing/lib/Continuations.cpp b/llvmraytracing/lib/Continuations.cpp index d4194e212f..4b4440f150 100644 --- a/llvmraytracing/lib/Continuations.cpp +++ b/llvmraytracing/lib/Continuations.cpp @@ -532,7 +532,7 @@ void ContHelper::addDxilContinuationPasses(ModulePassManager &MPM, Module *Gpurt // Fixup DXIL vs LLVM incompatibilities. This needs to run first. // If we add more LLVM processing separate from continuation passes, // we potentially should do it earlier as part of the module loading. - MPM.addPass(CompilerUtils::DxilToLlvmPass()); + MPM.addPass(compilerutils::DxilToLlvmPass()); // Translate dx.op intrinsic calls to lgc.rt dialect intrinsic calls MPM.addPass(DXILContLgcRtOpConverterPass()); @@ -551,7 +551,7 @@ void ContHelper::addDxilContinuationPasses(ModulePassManager &MPM, Module *Gpurt } void ContHelper::addDxilGpurtLibraryPasses(ModulePassManager &MPM) { - MPM.addPass(CompilerUtils::DxilToLlvmPass()); + MPM.addPass(compilerutils::DxilToLlvmPass()); MPM.addPass(llvm::DXILContPrepareGpurtLibraryPass()); MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); @@ -734,7 +734,7 @@ Value *llvm::getDXILSystemData(IRBuilder<> &B, Value *SystemData, Type *SystemDa Value *llvm::replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, Value *SystemData, lgc::rt::RayTracingShaderStage Kind, CallInst *Call, Module *GpurtLibrary, - CompilerUtils::CrossModuleInliner &Inliner, bool KeepBuilderPos) { + compilerutils::CrossModuleInliner &Inliner, bool KeepBuilderPos) { if (!KeepBuilderPos) B.SetInsertPoint(Call); @@ -901,7 +901,7 @@ static bool replaceEnqueueIntrinsic(Function &F) { if (NewCall->getFunction()->getName() == ContDriverFunc::ExitRayGenName) ContHelper::OutgoingRegisterCount::setValue(NewCall, 0); - CompilerUtils::createUnreachable(B); + compilerutils::createUnreachable(B); Changed = true; }); @@ -929,7 +929,7 @@ static bool replaceAwaitIntrinsic(Function &F, bool PreserveWaitMasks = true) { NewArgs.erase(NewArgs.begin() + 1); B.SetInsertPoint(&CInst); - auto *NewCall = CompilerUtils::createNamedCall(B, "_AmdAwait", CInst.getType(), NewArgs, {}); + auto *NewCall = compilerutils::createNamedCall(B, "_AmdAwait", CInst.getType(), NewArgs, {}); CInst.replaceAllUsesWith(NewCall); if (PreserveWaitMasks) ContHelper::setWaitMask(*NewCall); @@ -1122,7 +1122,7 @@ Function *llvm::tryGpurtPointerArgPromotion(Function *Func) { } // promotePointerArguments returns the input if no argument was promoted. - auto *NewFunc = CompilerUtils::promotePointerArguments(Func, PromotionMask); + auto *NewFunc = compilerutils::promotePointerArguments(Func, PromotionMask); // This function is provided by the compiler to GPURT. It will be substituted by LowerRaytracingPipeline. // NOTE: GPURT now preserves all function names started with "_Amd", but some of them are not intrinsics, e.g., @@ -1288,9 +1288,7 @@ void addLgcContinuationTransform(ModulePassManager &MPM) { // Scalarizer pass could break down system data structure (and possibly other data) which would help to reduce size of // continuations state. - ScalarizerPassOptions scalarizerOptions; - scalarizerOptions.ScalarizeMinBits = 32; - MPM.addPass(createModuleToFunctionPassAdaptor(ScalarizerPass(scalarizerOptions))); + MPM.addPass(createModuleToFunctionPassAdaptor(ScalarizerPass())); MPM.addPass(CoroEarlyPass()); CGSCCPassManager CGPM; diff --git a/llvmraytracing/lib/CpsStackLowering.cpp b/llvmraytracing/lib/CpsStackLowering.cpp index caec0d870d..7aa8d90b50 100644 --- a/llvmraytracing/lib/CpsStackLowering.cpp +++ b/llvmraytracing/lib/CpsStackLowering.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -35,7 +35,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" -using namespace CompilerUtils; +using namespace compilerutils; using namespace llvm; using namespace lgc::cps; @@ -396,7 +396,7 @@ Value *CpsStackLowering::getRealMemoryAddress(Value *Offset) { // `nullptr` if there is no base address and the csp // can be converted with ptrtoint. Function *CpsStackLowering::addOrInitCsp(Function *F, Function *GetGlobalMemBase, bool RequiresIncomingCsp) { - CompilerUtils::CrossModuleInliner CrossInliner; + compilerutils::CrossModuleInliner CrossInliner; auto &GpurtContext = lgc::GpurtContext::get(Mod->getContext()); auto &GpurtLibrary = GpurtContext.theModule ? *GpurtContext.theModule : *Mod; Value *Initializer = nullptr; @@ -411,7 +411,7 @@ Function *CpsStackLowering::addOrInitCsp(Function *F, Function *GetGlobalMemBase NewArgTys.insert(NewArgTys.begin(), Builder.getInt32Ty()); - Function *NewFunc = CompilerUtils::mutateFunctionArguments(*F, F->getReturnType(), NewArgTys, F->getAttributes()); + Function *NewFunc = compilerutils::mutateFunctionArguments(*F, F->getReturnType(), NewArgTys, F->getAttributes()); Argument *CspArg = NewFunc->getArg(0); CspArg->setName("cspInit"); diff --git a/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp b/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp index 5b3d5ad9c9..b82cbcc846 100644 --- a/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp +++ b/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -29,6 +29,7 @@ // //===----------------------------------------------------------------------===// +#include "DXILEnums.h" #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsUtil.h" #include "lgc/LgcIlCpsDialect.h" @@ -50,40 +51,7 @@ namespace { using namespace llvm; - -/// An enum to simplify fetching the attributes from reportHit operations. -enum class ReportHitAttributeIndex { THit = 1, HitKind, Attributes, Count = Attributes }; - -/// An enum to simplify fetching the attributes from callShader operations. -enum class CallShaderAttributeIndex { ShaderIndex = 1, Param = 2, Count = Param }; - -/// An enum to simplify fetching the attributes from traceRay operations. -enum class TraceRayAttributeIndex { - AccelStruct = 1, - RayFlags, - InstanceInclusionMask, - RayContributionToHitGroupIndex, - MultiplierForGeometryContribution, - MissShaderIndex, - OriginX, - OriginY, - OriginZ, - TMin, - DirX, - DirY, - DirZ, - TMax, - Payload, - Count = Payload -}; - -template llvm::Value *getEnumArgOperand(llvm::CallInst &CI, T Index) { - static_assert(std::is_enum() && "T must be an enum!"); - - llvm::Value *Arg = CI.getArgOperand(static_cast(Index)); - assert(Arg && "Requested argument should not be nullptr!"); - return Arg; -} +using namespace llvmraytracing; static void analyzeShaderKinds(Module &M, MapVector &ShaderKinds) { auto *EntryPoints = M.getNamedMetadata("dx.entryPoints"); @@ -167,33 +135,30 @@ template Value *DXILContLgcRtOpConverterPass::handleSimpleCall(Cal /// Create a lgc.rt.trace.ray op from a dx.op.traceRay call. Value *DXILContLgcRtOpConverterPass::handleTraceRayOp(CallInst &CI) { - assert(CI.arg_size() >= static_cast(TraceRayAttributeIndex::Count) && "Invalid argument size!"); + assert(CI.arg_size() == static_cast(TraceRayArgIndex::Count) && "Invalid argument size!"); Builder->SetInsertPoint(&CI); - Value *AccelStructHandle = getEnumArgOperand(CI, TraceRayAttributeIndex::AccelStruct); - Value *RayFlags = getEnumArgOperand(CI, TraceRayAttributeIndex::RayFlags); - Value *InstanceInclusionMask = getEnumArgOperand(CI, TraceRayAttributeIndex::InstanceInclusionMask); - Value *RayContributionToHitGroupIndex = getEnumArgOperand(CI, TraceRayAttributeIndex::RayContributionToHitGroupIndex); - Value *MultiplierForGeometryContribution = - getEnumArgOperand(CI, TraceRayAttributeIndex::MultiplierForGeometryContribution); - Value *MissShaderIndex = getEnumArgOperand(CI, TraceRayAttributeIndex::MissShaderIndex); - Value *Origin = createVec3(getEnumArgOperand(CI, TraceRayAttributeIndex::OriginX), - getEnumArgOperand(CI, TraceRayAttributeIndex::OriginY), - getEnumArgOperand(CI, TraceRayAttributeIndex::OriginZ)); - Value *TMin = getEnumArgOperand(CI, TraceRayAttributeIndex::TMin); - Value *Dir = createVec3(getEnumArgOperand(CI, TraceRayAttributeIndex::DirX), - getEnumArgOperand(CI, TraceRayAttributeIndex::DirY), - getEnumArgOperand(CI, TraceRayAttributeIndex::DirZ)); - Value *TMax = getEnumArgOperand(CI, TraceRayAttributeIndex::TMax); - Value *Payload = getEnumArgOperand(CI, TraceRayAttributeIndex::Payload); + Value *AccelStructHandle = CI.getArgOperand(TraceRayArgIndex::AccelStruct); + Value *RayFlags = CI.getArgOperand(TraceRayArgIndex::RayFlags); + Value *InstanceInclusionMask = CI.getArgOperand(TraceRayArgIndex::InstanceInclusionMask); + Value *RayContributionToHitGroupIndex = CI.getArgOperand(TraceRayArgIndex::RayContributionToHitGroupIndex); + Value *MultiplierForGeometryContribution = CI.getArgOperand(TraceRayArgIndex::MultiplierForGeometryContribution); + Value *MissShaderIndex = CI.getArgOperand(TraceRayArgIndex::MissShaderIndex); + Value *Origin = createVec3(CI.getArgOperand(TraceRayArgIndex::OriginX), CI.getArgOperand(TraceRayArgIndex::OriginY), + CI.getArgOperand(TraceRayArgIndex::OriginZ)); + Value *TMin = CI.getArgOperand(TraceRayArgIndex::TMin); + Value *Dir = createVec3(CI.getArgOperand(TraceRayArgIndex::DirX), CI.getArgOperand(TraceRayArgIndex::DirY), + CI.getArgOperand(TraceRayArgIndex::DirZ)); + Value *TMax = CI.getArgOperand(TraceRayArgIndex::TMax); + Value *Payload = CI.getArgOperand(TraceRayArgIndex::Payload); Function *AccelStructGetter = getAccelStructAddr(*CI.getModule(), AccelStructHandle->getType()); Value *AccelStructAddr = Builder->CreateCall(AccelStructGetter, AccelStructHandle); // TODO: This only creates a Paq array with the size of the payload data for // now. - Type *PaqTy = getFuncArgPtrElementType(CI.getCalledFunction(), static_cast(TraceRayAttributeIndex::Payload)); + Type *PaqTy = getFuncArgPtrElementType(CI.getCalledFunction(), static_cast(TraceRayArgIndex::Payload)); SmallVector PaqArgs; if (PaqTy) PaqArgs.push_back(ConstantInt::get(Builder->getInt32Ty(), DL->getTypeAllocSize(PaqTy).getKnownMinValue())); @@ -211,14 +176,14 @@ Value *DXILContLgcRtOpConverterPass::handleTraceRayOp(CallInst &CI) { /// Create a lgc.rt.report.hit op from a dx.op.reportHit call. Value *DXILContLgcRtOpConverterPass::handleReportHitOp(CallInst &CI) { - assert(CI.arg_size() >= static_cast(ReportHitAttributeIndex::Count) && "Invalid argument size!"); + assert(CI.arg_size() == static_cast(ReportHitArgIndex::Count) && "Invalid argument size!"); Builder->SetInsertPoint(&CI); - Value *THit = getEnumArgOperand(CI, ReportHitAttributeIndex::THit); - Value *HitKind = getEnumArgOperand(CI, ReportHitAttributeIndex::HitKind); - Value *Attributes = getEnumArgOperand(CI, ReportHitAttributeIndex::Attributes); + Value *THit = CI.getArgOperand(ReportHitArgIndex::THit); + Value *HitKind = CI.getArgOperand(ReportHitArgIndex::HitKind); + Value *Attributes = CI.getArgOperand(ReportHitArgIndex::Attributes); auto AttributeSizeBytes = DL->getTypeAllocSize( - getFuncArgPtrElementType(CI.getCalledFunction(), static_cast(ReportHitAttributeIndex::Attributes))); + getFuncArgPtrElementType(CI.getCalledFunction(), static_cast(ReportHitArgIndex::Attributes))); auto *Op = Builder->create(THit, HitKind, Attributes, AttributeSizeBytes); @@ -229,14 +194,14 @@ Value *DXILContLgcRtOpConverterPass::handleReportHitOp(CallInst &CI) { /// Create a lgc.rt.call.callable.shader op from a dx.op.callShader call. Value *DXILContLgcRtOpConverterPass::handleCallShaderOp(CallInst &CI) { - assert(CI.arg_size() >= static_cast(CallShaderAttributeIndex::Count) && "Invalid argument size!"); + assert(CI.arg_size() == static_cast(CallShaderArgIndex::Count) && "Invalid argument size!"); Builder->SetInsertPoint(&CI); - Value *ShaderIndex = getEnumArgOperand(CI, CallShaderAttributeIndex::ShaderIndex); - Value *Param = getEnumArgOperand(CI, CallShaderAttributeIndex::Param); + Value *ShaderIndex = CI.getArgOperand(CallShaderArgIndex::ShaderIndex); + Value *Param = CI.getArgOperand(CallShaderArgIndex::Param); auto ParamSizeBytes = DL->getTypeAllocSize( - getFuncArgPtrElementType(CI.getCalledFunction(), static_cast(CallShaderAttributeIndex::Param))); + getFuncArgPtrElementType(CI.getCalledFunction(), static_cast(CallShaderArgIndex::Param))); auto *Op = Builder->create(ShaderIndex, Param, ParamSizeBytes.getKnownMinValue()); diff --git a/llvmraytracing/lib/DXILContPostProcess.cpp b/llvmraytracing/lib/DXILContPostProcess.cpp index 5409bbe07a..1c085c8d90 100644 --- a/llvmraytracing/lib/DXILContPostProcess.cpp +++ b/llvmraytracing/lib/DXILContPostProcess.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -107,7 +107,10 @@ void DXILContPostProcessPassImpl::lowerJumpOp(lgc::cps::JumpOp &JumpOp) { SmallVector TailArgs{JumpOp.getTail()}; - Value *JumpTarget = ensure64BitAddr(JumpOp.getTarget()); + Value *JumpTarget = JumpOp.getTarget(); + if (!ContHelper::tryGetDeferVpcUnpacking(*Mod)) + JumpTarget = ensure64BitAddr(JumpTarget); + Value *ShaderIndex = JumpOp.getShaderIndex(); Value *RetAddr = JumpOp.getRcr(); if (ContHelper::isWaitAwaitCall(JumpOp)) { diff --git a/llvmraytracing/lib/DXILContPrepareGpurtLibrary.cpp b/llvmraytracing/lib/DXILContPrepareGpurtLibrary.cpp index c0d43557c7..6c3110fbee 100644 --- a/llvmraytracing/lib/DXILContPrepareGpurtLibrary.cpp +++ b/llvmraytracing/lib/DXILContPrepareGpurtLibrary.cpp @@ -63,7 +63,7 @@ static Function *transformFunction(Function &F) { auto Name = F.getName(); LLVM_DEBUG(dbgs() << "Transforming function " << Name << "\n"); // Copy name, otherwise it will be deleted before it is set - std::string NewName = CompilerUtils::dxil::tryDemangleFunctionName(Name.str()).str(); + std::string NewName = compilerutils::dxil::tryDemangleFunctionName(Name.str()).str(); LLVM_DEBUG(dbgs() << " Set new name " << NewName << "\n"); F.setName(NewName); @@ -79,13 +79,13 @@ static Function *transformFunction(Function &F) { Function *NewFn = &F; if (NewRetTy->isStructTy() && NewRetTy->getStructNumElements() == 1) { if (F.getName().contains("ObjectToWorld4x3") || F.getName().contains("WorldToObject4x3")) { - NewFn = CompilerUtils::unpackStructReturnType(NewFn); + NewFn = compilerutils::unpackStructReturnType(NewFn); } } // Lower `StructRet` argument. if (NewFn->hasStructRetAttr()) - NewFn = CompilerUtils::lowerStructRetArgument(NewFn); + NewFn = compilerutils::lowerStructRetArgument(NewFn); NewFn->addFnAttr(Attribute::AlwaysInline); // Set external linkage, so the functions don't get removed, even if they are diff --git a/llvmraytracing/lib/DXILEnums.h b/llvmraytracing/lib/DXILEnums.h new file mode 100644 index 0000000000..666aabe47d --- /dev/null +++ b/llvmraytracing/lib/DXILEnums.h @@ -0,0 +1,164 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +//===- DXILEnums.h ---------------------------------------------------------===// +// +// This files contains enums and related functions for processing DXIL. +// +//===----------------------------------------------------------------------===// + +#pragma once +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include + +namespace llvmraytracing { + +// DXIL defines a large number of DxOpCodes. We only include the ones we need here. +namespace DxOpCode { +enum Enum : unsigned { + IsNaN = 8, // Returns true if x is NAN or QNAN, false otherwise. + + // This is not a typical return code for DirectXShaderCompiler, but we use it. + Invalid = 0xFFFFFFFF, +}; +} // namespace DxOpCode + +namespace IsSpecialFloatF32ArgIndex { +enum Enum : unsigned { Opcode = 0, Value, Count }; +} + +namespace CreateHandleArgIndex { +enum Enum { + OpCode, + ResourceClass, + RangeId, + Index, + NonUniformIndex, +}; +} + +namespace CreateHandleFromBindingArgIndex { +enum Enum { + OpCode, + Bind, + Index, + NonUniformIndex, +}; +} + +namespace CreateHandleForLibArgIndex { +enum Enum { OpCode, Resource }; +} + +namespace AnnotateHandleArgIndex { +enum OperandIdx { OpCode, Bind, Index, NonUniformIndex }; +} + +namespace ReportHitArgIndex { +enum Enum : unsigned { Opcode = 0, THit, HitKind, Attributes, Count }; +} + +namespace CallShaderArgIndex { +enum Enum : unsigned { Opcode = 0, ShaderIndex, Param, Count }; +} + +namespace AtomicCompareExchangeArgIndex { +enum Enum : unsigned { Opcode = 0, Handle, Offset0, Offset1, Offset2, CompareValue, NewValue, Count }; +} + +namespace TraceRayArgIndex { +enum Enum : unsigned { + Opcode = 0, + AccelStruct, + RayFlags, + InstanceInclusionMask, + RayContributionToHitGroupIndex, + MultiplierForGeometryContribution, + MissShaderIndex, + OriginX, + OriginY, + OriginZ, + TMin, + DirX, + DirY, + DirZ, + TMax, + Payload, + Count +}; +} + +namespace RayQueryTraceRayInlineArgIndex { +enum Enum : unsigned { + Opcode = 0, + RayQueryHandle, + AccelStruct, + RayFlags, + InstanceInclusionMask, + OriginX, + OriginY, + OriginZ, + TMin, + DirX, + DirY, + DirZ, + TMax, + Count +}; +} + +// Indices into the dx.resources MDTuple. +namespace DxResourceMDIndex { +enum Enum : unsigned { SRVs = 0, UAVs, CBuffers, Samplers, Count }; +} + +// Indices of a resource MDTuple. +namespace ResourceMDIndex { +enum Enum : unsigned { + ID = 0, // Unique (per type) resource ID. + Variable, // Resource global variable. + Name, // Original (HLSL) name of the resource. + SpaceID, // Resource range space ID. + LowerBound, // Resource range lower bound. + RangeSize, // Resource range size. + Count +}; +} + +namespace DxResourceClass { +enum Enum { SRV, UAV, CBuffer, Sampler, Invalid }; +} + +namespace DxResBindIndex { +enum Enum { + RangeLowerBound, + RangeUpperBound, + SpaceID, + ResourceClass, +}; +} + +} // namespace llvmraytracing diff --git a/llvmraytracing/lib/LgcCpsDialect.cpp b/llvmraytracing/lib/LgcCpsDialect.cpp index d125fc370c..279c8b1c9c 100644 --- a/llvmraytracing/lib/LgcCpsDialect.cpp +++ b/llvmraytracing/lib/LgcCpsDialect.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -43,6 +43,7 @@ using namespace llvm; using namespace lgc::rt; constexpr const char CpsMetadata[] = "lgc.cps"; +constexpr const char CpsMaxArgumentVgprsMetadata[] = "lgc.cps.maxArgumentVgprs"; // ===================================================================================================================== // Helper to determine how many dwords we require to store a variable of a given @@ -113,6 +114,27 @@ std::optional lgc::cps::getRemainingArgumentDwords(const DataLayout &D return MaxArgumentDwords - currentDwordUsage; } +// ===================================================================================================================== +// Get the maximum number of VGPR registers that can be used as arguments by any +// shader in the pipeline. This includes payload registers and their +// corresponding padding. +std::optional lgc::cps::getMaxArgumentVgprs(const Module &m) { + NamedMDNode *node = m.getNamedMetadata(CpsMaxArgumentVgprsMetadata); + if (!node) + return std::nullopt; + + return mdconst::extract(node->getOperand(0)->getOperand(0))->getZExtValue(); +} + +// Set the maximum number of VGPR registers that can be used as arguments by any +// shader in the pipeline. +void lgc::cps::setMaxArgumentVgprs(Module &module, unsigned maxArgumentVgprs) { + LLVMContext &context = module.getContext(); + MDNode *node = + MDNode::get(context, {ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(context), maxArgumentVgprs))}); + module.getOrInsertNamedMetadata(CpsMaxArgumentVgprsMetadata)->addOperand(node); +} + // ===================================================================================================================== // Checks if a function is annotated with !lgc.cps metadata. bool lgc::cps::isCpsFunction(const Function &fn) { @@ -123,8 +145,8 @@ bool lgc::cps::isCpsFunction(const Function &fn) { // ===================================================================================================================== // Transforms a function into a CPS function by setting the CPS level as // metadata. -void lgc::cps::setCpsFunctionLevel(Function &fn, CpsLevel level) { - assert(level < CpsLevel::Count && "Invalid CPS level!"); +void lgc::cps::setCpsFunctionLevel(Function &fn, CpsSchedulingLevel level) { + assert(level < CpsSchedulingLevel::Count && "Invalid CPS level!"); LLVMContext &context = fn.getContext(); MDNode *node = MDNode::get( @@ -136,7 +158,7 @@ void lgc::cps::setCpsFunctionLevel(Function &fn, CpsLevel level) { // Returns the CPS level of a function, if the function is a CPS function and // has the level metadata node set. For now, this always expects a function to // have both the CPS metadata and the level metadata. -lgc::cps::CpsLevel lgc::cps::getCpsLevelFromFunction(const Function &fn) { +CpsSchedulingLevel lgc::cps::getCpsLevelFromFunction(const Function &fn) { MDNode *node = fn.getMetadata(fn.getContext().getMDKindID(CpsMetadata)); if (!node) { // Expect that we have set the CPS metadata. @@ -145,28 +167,28 @@ lgc::cps::CpsLevel lgc::cps::getCpsLevelFromFunction(const Function &fn) { const ConstantAsMetadata *c = cast(node->getOperand(0)); unsigned level = cast(c->getValue())->getZExtValue(); - assert(level < static_cast(CpsLevel::Count) && "Invalid CPS level!"); - return static_cast(level); + assert(level < static_cast(CpsSchedulingLevel::Count) && "Invalid CPS level!"); + return static_cast(level); } // ===================================================================================================================== // Transform a shader type into the corresponding CPS level. -lgc::cps::CpsLevel lgc::cps::getCpsLevelForShaderStage(RayTracingShaderStage stage) { +CpsSchedulingLevel lgc::cps::getCpsLevelForShaderStage(RayTracingShaderStage stage) { if (stage == RayTracingShaderStage::RayGeneration) - return CpsLevel::RayGen; + return CpsSchedulingLevel::RayGen; if (stage == RayTracingShaderStage::Traversal) - return CpsLevel::Traversal; + return CpsSchedulingLevel::Traversal; if (stage == RayTracingShaderStage::ClosestHit || stage == RayTracingShaderStage::Miss || stage == RayTracingShaderStage::Callable) - return CpsLevel::ClosestHit_Miss_Callable; + return CpsSchedulingLevel::ClosestHit_Miss_Callable; if (stage == RayTracingShaderStage::AnyHit) - return CpsLevel::AnyHit_CombinedIntersection_AnyHit; + return CpsSchedulingLevel::AnyHit_CombinedIntersection_AnyHit; if (stage == RayTracingShaderStage::Intersection) - return CpsLevel::Intersection; + return CpsSchedulingLevel::Intersection; llvm_unreachable("Cannot determine CPS level."); } @@ -177,7 +199,7 @@ lgc::cps::CpsLevel lgc::cps::getCpsLevelForShaderStage(RayTracingShaderStage sta uint8_t lgc::cps::getPotentialCpsReturnLevels(RayTracingShaderStage stage) { std::bitset<8> CpsLevels; - auto SetLevel = [&CpsLevels](CpsLevel Level) -> void { CpsLevels.set(static_cast(Level)); }; + auto SetLevel = [&CpsLevels](CpsSchedulingLevel Level) -> void { CpsLevels.set(static_cast(Level)); }; switch (stage) { case RayTracingShaderStage::RayGeneration: @@ -190,18 +212,18 @@ uint8_t lgc::cps::getPotentialCpsReturnLevels(RayTracingShaderStage stage) { case RayTracingShaderStage::Miss: case RayTracingShaderStage::Traversal: // These stages returns to wherever TraceRay is called (RGS, CHS and miss). - SetLevel(CpsLevel::RayGen); - SetLevel(CpsLevel::ClosestHit_Miss_Callable); + SetLevel(CpsSchedulingLevel::RayGen); + SetLevel(CpsSchedulingLevel::ClosestHit_Miss_Callable); break; case RayTracingShaderStage::AnyHit: // AHS returns to Traversal (triangle intersection) or IS (procedural // intersection). - SetLevel(CpsLevel::Traversal); - SetLevel(CpsLevel::Intersection); + SetLevel(CpsSchedulingLevel::Traversal); + SetLevel(CpsSchedulingLevel::Intersection); break; case RayTracingShaderStage::Intersection: // IS returns to Traversal only. - SetLevel(CpsLevel::Traversal); + SetLevel(CpsSchedulingLevel::Traversal); break; default: llvm_unreachable("Cannot determine CPS level."); diff --git a/llvmraytracing/lib/LgcCpsJumpInliner.cpp b/llvmraytracing/lib/LgcCpsJumpInliner.cpp index c0afacbbf2..044c48bc67 100644 --- a/llvmraytracing/lib/LgcCpsJumpInliner.cpp +++ b/llvmraytracing/lib/LgcCpsJumpInliner.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -58,7 +58,7 @@ class LgcCpsJumpInlinerPassImpl final { LLVMContext *Context; const DataLayout *DL; llvm_dialects::Builder Builder; - CompilerUtils::CrossModuleInliner CrossInliner; + compilerutils::CrossModuleInliner CrossInliner; }; } // namespace diff --git a/llvmraytracing/lib/LowerAwait.cpp b/llvmraytracing/lib/LowerAwait.cpp index 3c6849a08b..ad0341f5e3 100644 --- a/llvmraytracing/lib/LowerAwait.cpp +++ b/llvmraytracing/lib/LowerAwait.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -103,7 +103,7 @@ void LowerAwaitPassImpl::processContinuations() { // Create new empty function auto *NewFuncTy = FunctionType::get(NewRetTy, AllArgTypes, false); - Function *NewFunc = CompilerUtils::cloneFunctionHeader(*F, NewFuncTy, ArrayRef{}); + Function *NewFunc = compilerutils::cloneFunctionHeader(*F, NewFuncTy, ArrayRef{}); NewFunc->takeName(F); // Transfer code from old function to new function diff --git a/llvmraytracing/lib/LowerRayQuery.cpp b/llvmraytracing/lib/LowerRayQuery.cpp index f94e3cc0df..db6cb84a5b 100644 --- a/llvmraytracing/lib/LowerRayQuery.cpp +++ b/llvmraytracing/lib/LowerRayQuery.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -43,7 +43,7 @@ using namespace lgc; using namespace lgc::rt; using namespace llvm; using namespace llvm_dialects; -using namespace CompilerUtils; +using namespace compilerutils; // Table of GPURT function names. Must match the order of enum GpurtFunc. static const char *const GpurtFuncNames[] = { diff --git a/llvmraytracing/lib/LowerRaytracingPipeline.cpp b/llvmraytracing/lib/LowerRaytracingPipeline.cpp index 806ffa495d..cf717fac22 100644 --- a/llvmraytracing/lib/LowerRaytracingPipeline.cpp +++ b/llvmraytracing/lib/LowerRaytracingPipeline.cpp @@ -163,13 +163,13 @@ struct PayloadCopyHelper { PAQIndexInterval Interval = {CompleteInterval.Begin, std::min(CompleteInterval.End, PayloadRegisterCount)}; // Pointer to start of current interval in global payload auto *GlobalIntervalI32Ptr = - CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(B, I32, Serialization, Interval.Begin); + compilerutils::simplifyingCreateConstInBoundsGEP1_32(B, I32, Serialization, Interval.Begin); TmpIntervals.push_back({Interval, GlobalIntervalI32Ptr}); } if (CompleteInterval.End > PayloadRegisterCount) { PAQIndexInterval Interval = {std::max(CompleteInterval.Begin, PayloadRegisterCount), CompleteInterval.End}; // Pointer to start of current interval in global payload - auto *GlobalIntervalI32Ptr = CompilerUtils::simplifyingCreateConstInBoundsGEP1_32( + auto *GlobalIntervalI32Ptr = compilerutils::simplifyingCreateConstInBoundsGEP1_32( B, I32, SpilledPayloadPtr, Interval.Begin - PayloadRegisterCount); TmpIntervals.push_back({Interval, GlobalIntervalI32Ptr}); } @@ -182,7 +182,7 @@ struct PayloadCopyHelper { assert(*FieldByteOffset == FieldI32Offset * RegisterBytes); // I32 pointer into field, offset by FieldI32Offset auto *FieldIntervalI32Ptr = - CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(B, I32, LocalFieldPtr, FieldI32Offset); + compilerutils::simplifyingCreateConstInBoundsGEP1_32(B, I32, LocalFieldPtr, FieldI32Offset); // Determine Src and Dst auto *Src = FieldIntervalI32Ptr; @@ -232,6 +232,11 @@ class ModuleMetadataState final { void updateModuleMetadata() const; + void updateMaxArgumentVgprs(uint32_t Count) { + assert(!MaxArgumentVgprs.has_value() && "Shouldn't have to set this twice"); + MaxArgumentVgprs = Count; + } + private: Module &Mod; /// MaxPayloadRegisterCount is initialized from metadata. If there is none, @@ -259,6 +264,8 @@ class ModuleMetadataState final { // This mechanism ensures we don't rely on it in case the value was only initialized // during processing of the current module. bool MaxUsedPayloadRegisterCountWasSet = false; + + std::optional MaxArgumentVgprs = std::nullopt; }; class LowerRaytracingPipelinePassImpl final { @@ -405,11 +412,7 @@ class LowerRaytracingPipelinePassImpl final { /// Compute the dword at which payload starts in the argument at most in the /// argument list. - std::optional getPayloadStartDword(FunctionData &Data, uint32_t MaxHitAttributeBytes, - Type *TraversalDataTy) { - if (Data.PayloadStorageTy->getArrayNumElements() == 0) - return std::nullopt; - + uint32_t getPayloadStartDword(uint32_t MaxHitAttributeBytes, Type *TraversalDataTy) { assert(TraversalDataTy && "Failed to detect traversal system data type"); // Always ensure that we consider the two dword barycentric coordinates @@ -499,7 +502,6 @@ class LowerRaytracingPipelinePassImpl final { void replaceShaderIndexCall(FunctionData &Data, CallInst *Call); void replaceShaderRecordBufferCall(FunctionData &Data, CallInst *Call); - void handleGetShaderKind(Function &Func); void handleGetCurrentFuncAddr(Function &Func); void handleAmdInternalFunc(Function &Func); @@ -553,6 +555,8 @@ class LowerRaytracingPipelinePassImpl final { // continue calls) in GpuRt entries (Traversal and launch kernel). void setGpurtEntryRegisterCountMetadata(); + void setMaxArgumentVgprsMetadata(); + void copyHitAttributes(FunctionData &Data, Value *SystemData, Type *SystemDataTy, Value *LocalHitAttributes, bool GlobalToLocal, const PAQSerializationLayout *Layout); void processContinuations(); @@ -578,7 +582,7 @@ class LowerRaytracingPipelinePassImpl final { ModuleMetadataState MetadataState; PAQSerializationInfoManager PAQManager; PayloadHelper PayloadHelper; - CompilerUtils::CrossModuleInliner CrossInliner; + compilerutils::CrossModuleInliner CrossInliner; Type *I32; /// System data type passed to Traversal Type *TraversalDataTy; @@ -589,6 +593,8 @@ class LowerRaytracingPipelinePassImpl final { /// Vgpr Argument struct type passed to shaders StructType *VgprArgumentStructTy; + std::optional PayloadStartDword; + // Function definitions and declarations from HLSL // Driver implementation that returns if AcceptHitAndEndSearch was called Function *IsEndSearch; @@ -642,6 +648,8 @@ void ModuleMetadataState::updateModuleMetadata() const { ContHelper::MaxPayloadRegisterCount::setValue(&Mod, MaxPayloadRegisterCount); ContHelper::MaxUsedPayloadRegisterCount::setValue(&Mod, MaxUsedPayloadRegisterCount); ContHelper::setStackAddrspace(Mod, StackAddrspace); + if (MaxArgumentVgprs.has_value()) + lgc::cps::setMaxArgumentVgprs(Mod, *MaxArgumentVgprs); } // Create a lgc.cps.await operation for a given shader address. @@ -1057,26 +1065,6 @@ void LowerRaytracingPipelinePassImpl::replaceShaderRecordBufferCall(FunctionData Call->eraseFromParent(); } -void LowerRaytracingPipelinePassImpl::handleGetShaderKind(Function &Func) { - assert(Func.getReturnType()->isIntegerTy(32) && Func.arg_size() == 0); - - llvm::forEachCall(Func, [&](llvm::CallInst &CInst) { - Function *F = CInst.getFunction(); - auto Stage = getLgcRtShaderStage(F); - - // Ignore GetShaderKind calls where we cannot find the shader kind. - // This happens e.g. in gpurt-implemented intrinsics that got inlined, - // but not removed. - if (!Stage) - return; - - DXILShaderKind ShaderKind = ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); - auto *ShaderKindVal = ConstantInt::get(Func.getReturnType(), static_cast(ShaderKind)); - CInst.replaceAllUsesWith(ShaderKindVal); - CInst.eraseFromParent(); - }); -} - void LowerRaytracingPipelinePassImpl::handleGetCurrentFuncAddr(Function &Func) { assert(Func.empty() && // Returns an i32 or i64 @@ -1098,8 +1086,8 @@ void llvm::copyBytes(IRBuilder<> &B, Value *Dst, Value *Src, uint64_t NumBytes) uint64_t NumFullI32s = NumBytes / RegisterBytes; // Copy full I32s for (uint64_t I32Index = 0; I32Index < NumFullI32s; ++I32Index) { - auto *DstPtr = CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(B, I32, Dst, I32Index); - auto *SrcPtr = CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(B, I32, Src, I32Index); + auto *DstPtr = compilerutils::simplifyingCreateConstInBoundsGEP1_32(B, I32, Dst, I32Index); + auto *SrcPtr = compilerutils::simplifyingCreateConstInBoundsGEP1_32(B, I32, Src, I32Index); auto *Val = B.CreateLoad(I32, SrcPtr); B.CreateStore(Val, DstPtr); } @@ -1112,8 +1100,8 @@ void llvm::copyBytes(IRBuilder<> &B, Value *Dst, Value *Src, uint64_t NumBytes) // Create i8 loads and stores for the remaining bytes Type *I8 = B.getIntNTy(8); for (uint64_t I8Index = NumFullI32s * RegisterBytes; I8Index < NumBytes; ++I8Index) { - auto *DstPtr = CompilerUtils::simplifyingCreateConstGEP1_32(B, I8, Dst, I8Index); - auto *SrcPtr = CompilerUtils::simplifyingCreateConstGEP1_32(B, I8, Src, I8Index); + auto *DstPtr = compilerutils::simplifyingCreateConstGEP1_32(B, I8, Dst, I8Index); + auto *SrcPtr = compilerutils::simplifyingCreateConstGEP1_32(B, I8, Src, I8Index); auto *Val = B.CreateLoad(I8, SrcPtr); B.CreateStore(Val, DstPtr); } @@ -1133,7 +1121,7 @@ void LowerRaytracingPipelinePassImpl::copyPayload(Type &PayloadTy, Value *LocalP Value *SpilledPayloadPtr = nullptr; if (Layout.PayloadMemPointerNode) { - auto *SpillPtr = CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(Builder, Builder.getInt8Ty(), PayloadStorage, + auto *SpillPtr = compilerutils::simplifyingCreateConstInBoundsGEP1_32(Builder, Builder.getInt8Ty(), PayloadStorage, FirstPayloadMemoryPointerRegister); SpilledPayloadPtr = Builder.CreateLoad(Builder.getPtrTy(lgc::cps::stackAddrSpace), SpillPtr); } @@ -1196,7 +1184,7 @@ void LowerRaytracingPipelinePassImpl::savePayloadRegistersBeforeRecursion( for (const PAQIndexInterval &Interval : StorageInfo.IndexIntervals) { for (unsigned I = Interval.Begin; I < std::min(Interval.End, MetadataState.getMaxPayloadRegisterCount()); ++I) { // Create backup of the I-th payload register - auto *LoadPtr = CompilerUtils::simplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); + auto *LoadPtr = compilerutils::simplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); auto *OldValue = Builder.CreateLoad(RegTy, LoadPtr); // As long as we keep a 32 bit alignment of all fields, all fields // get disjoint registers, and we should never save a register twice. @@ -1220,7 +1208,7 @@ void LowerRaytracingPipelinePassImpl::restorePayloadRegistersAfterRecursion( for (unsigned I = 0; I < SavedRegisterValues.size(); ++I) { Value *OldValue = SavedRegisterValues[I]; if (OldValue) { - auto *StorePtr = CompilerUtils::simplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); + auto *StorePtr = compilerutils::simplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); Builder.CreateStore(SavedRegisterValues[I], StorePtr); } } @@ -1242,7 +1230,7 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes(FunctionData &Data, Valu // depending on GlobalToLocal. Then, in the actual copy implementation, we // just access the alloca using loads and stores as for payload registers. auto InsertPoint = Builder.saveIP(); - Builder.SetInsertPoint(Builder.GetInsertBlock()->getParent()->getEntryBlock().getFirstNonPHI()); + Builder.SetInsertPoint(Builder.GetInsertBlock()->getParent()->getEntryBlock().getFirstNonPHIIt()); auto *InlineHitAttrsAlloc = Builder.CreateAlloca(InlineHitAttrsTy); auto *RegTyPtr = Builder.getPtrTy(InlineHitAttrsAlloc->getAddressSpace()); Builder.restoreIP(InsertPoint); @@ -1286,7 +1274,7 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes(FunctionData &Data, Valu // Assume maximum possible size PayloadHitAttrBytes = MetadataState.getMaxHitAttributeByteCount() - InlineHitAttrsBytes; // Use hit attribute storage at fixed index - PayloadHitAttrs = CompilerUtils::simplifyingCreateConstGEP1_32(Builder, I32, Data.PayloadStorage, + PayloadHitAttrs = compilerutils::simplifyingCreateConstGEP1_32(Builder, I32, Data.PayloadStorage, FirstPayloadHitAttributeStorageRegister); } @@ -1297,13 +1285,13 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes(FunctionData &Data, Valu LocalHitAttributes = Builder.CreateBitCast(LocalHitAttributes, RegTyPtr); auto *I8Ty = Builder.getInt8Ty(); for (unsigned I = 0; I < divideCeil(HitAttrsBytes, RegisterBytes); I++) { - auto *LocalPtr = CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, LocalHitAttributes, I); + auto *LocalPtr = compilerutils::simplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, LocalHitAttributes, I); Value *GlobalPtr; if (I < InlineRegSize) - GlobalPtr = CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, InlineHitAttrs, I); + GlobalPtr = compilerutils::simplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, InlineHitAttrs, I); else GlobalPtr = - CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, PayloadHitAttrs, I - InlineRegSize); + compilerutils::simplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, PayloadHitAttrs, I - InlineRegSize); auto *LoadPtr = GlobalToLocal ? GlobalPtr : LocalPtr; auto *StorePtr = GlobalToLocal ? LocalPtr : GlobalPtr; @@ -1315,8 +1303,8 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes(FunctionData &Data, Valu // Load byte by byte into a vector and pad the rest with undef for (unsigned J = 0; J < HitAttrsBytes % RegisterBytes; J++) { auto *Val = - Builder.CreateLoad(I8Ty, CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(Builder, I8Ty, LoadPtr, J)); - Builder.CreateStore(Val, CompilerUtils::simplifyingCreateConstInBoundsGEP1_32(Builder, I8Ty, StorePtr, J)); + Builder.CreateLoad(I8Ty, compilerutils::simplifyingCreateConstInBoundsGEP1_32(Builder, I8Ty, LoadPtr, J)); + Builder.CreateStore(Val, compilerutils::simplifyingCreateConstInBoundsGEP1_32(Builder, I8Ty, StorePtr, J)); } } } @@ -1379,9 +1367,25 @@ void LowerRaytracingPipelinePassImpl::setGpurtEntryRegisterCountMetadata() { Visitor.visit(State, *Mod); } +void LowerRaytracingPipelinePassImpl::setMaxArgumentVgprsMetadata() { + assert(PayloadStartDword.has_value() && "Payload start dword not set yet"); + + std::optional MaxPayloadRegisterCount = MetadataState.getMaxPayloadRegisterCount(); + assert(MaxPayloadRegisterCount.has_value() && "Missing max payload register count"); + + // We add 1 for the shader index and 1 for the return address, which weren't included in the calculations for the + // payload, and 1 for the continuation stack pointer that will be added in a later pass. + MetadataState.updateMaxArgumentVgprs(*PayloadStartDword + *MaxPayloadRegisterCount + 3); +} + void LowerRaytracingPipelinePassImpl::processContinuations() { I32 = Type::getInt32Ty(*Context); + if (ToProcess.empty()) + return; + + setMaxArgumentVgprsMetadata(); + for (auto &FuncData : ToProcess) { processFunction(FuncData.first, FuncData.second); } @@ -1697,22 +1701,21 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, FunctionData } assert(NumIncomingPayloadDwords.has_value()); + assert(PayloadStartDword.has_value()); Data.PayloadStorageTy = PayloadHelper.getPayloadStorageTy(MetadataState.getMaxPayloadRegisterCount(), Data); - Data.FirstPayloadArgumentDword = - PayloadHelper.getPayloadStartDword(Data, MetadataState.getMaxHitAttributeByteCount(), TraversalDataTy); + if (Data.PayloadStorageTy->getArrayNumElements() != 0) + Data.FirstPayloadArgumentDword = *PayloadStartDword; - if (!Data.isRayGeneration()) { - if (!Data.isAnyHit()) { - // Add a dummy argument for CpsArgIdx::HitAttributes so that the arg index - // of payload matches CpsArgIdx::Payload - AllArgTypes.push_back(StructType::get(*Context, {})); - } - - PayloadHelper.computePaddingAndPayloadArgTys(AllArgTypes, NumIncomingPayloadDwords.value(), - Data.FirstPayloadArgumentDword); + if (!Data.isAnyHit()) { + // Add a dummy argument for CpsArgIdx::HitAttributes so that the arg index + // of payload matches CpsArgIdx::Payload + AllArgTypes.push_back(StructType::get(*Context, {})); } + PayloadHelper.computePaddingAndPayloadArgTys(AllArgTypes, NumIncomingPayloadDwords.value(), + Data.isRayGeneration() ? std::nullopt : Data.FirstPayloadArgumentDword); + // Pass in the shader index and return address arguments so they don't get included in the padding. AllArgTypes.insert(AllArgTypes.begin(), {Builder.getInt32Ty(), Builder.getInt32Ty()}); @@ -1722,7 +1725,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, FunctionData // Create new function to change signature auto *NewFuncTy = FunctionType::get(Builder.getVoidTy(), AllArgTypes, false); - Function *NewFunc = CompilerUtils::cloneFunctionHeader(*F, NewFuncTy, ArrayRef{}); + Function *NewFunc = compilerutils::cloneFunctionHeader(*F, NewFuncTy, ArrayRef{}); NewFunc->takeName(F); // FIXME: Remove !pointeetypes metadata to workaround an llvm bug. If struct types // are referenced only from metadata, LLVM omits the type declaration when @@ -1735,7 +1738,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, FunctionData processFunctionEntry(Data, NewFunc->getArg(CpsArgIdx::SystemData)); // Mark as CPS function with the corresponding level. - CpsLevel Level = getCpsLevelForShaderStage(Data.Kind); + CpsSchedulingLevel Level = getCpsLevelForShaderStage(Data.Kind); setCpsFunctionLevel(*NewFunc, Level); if (!Data.isRayGeneration()) { @@ -2109,8 +2112,6 @@ void LowerRaytracingPipelinePassImpl::handleAmdInternalFunc(Function &Func) { if (FuncName.starts_with("_AmdGetFuncAddr")) { ContHelper::handleGetFuncAddr(Func, Builder); - } else if (FuncName.starts_with("_AmdGetShaderKind")) { - handleGetShaderKind(Func); } else if (FuncName.starts_with("_AmdGetCurrentFuncAddr")) { handleGetCurrentFuncAddr(Func); } @@ -2232,7 +2233,7 @@ void LowerRaytracingPipelinePassImpl::determineDispatchSystemDataType() { /// Try to find the scheduler function on the GPURT module. Extract the arguments struct type, create a new one extended /// by the maximum number of hit attribute and payload dwords and update the pointee type on the scheduler. void LowerRaytracingPipelinePassImpl::extendArgumentStruct() { - Function *SchedulerFunc = GpurtLibrary->getFunction(ContDriverFunc::SchedulerName); + Function *SchedulerFunc = Mod->getFunction(ContDriverFunc::SchedulerName); if (!SchedulerFunc) return; @@ -2339,8 +2340,11 @@ PreservedAnalyses LowerRaytracingPipelinePassImpl::run() { // Find the traversal system data type by looking at the argument to // ReportHit. TraversalDataTy = nullptr; - if (ReportHit) + if (ReportHit) { TraversalDataTy = getFuncArgPtrElementType(ReportHit, 0); + PayloadStartDword = + PayloadHelper.getPayloadStartDword(MetadataState.getMaxHitAttributeByteCount(), TraversalDataTy); + } HitMissDataTy = nullptr; if (auto *HitKind = GpurtLibrary->getFunction(ContDriverFunc::HitKindName)) { HitMissDataTy = getFuncArgPtrElementType(HitKind, 0); diff --git a/llvmraytracing/lib/RematSupport.cpp b/llvmraytracing/lib/RematSupport.cpp index f6f354f4dc..cfc3976c1a 100644 --- a/llvmraytracing/lib/RematSupport.cpp +++ b/llvmraytracing/lib/RematSupport.cpp @@ -183,9 +183,9 @@ static bool commonMaterializable(Instruction &Inst) { // Insert into constant. if (isa(Inst)) { Instruction *FirstInsert = nullptr; - if (FirstInsert = dyn_cast(&Inst)) + if ((FirstInsert = dyn_cast(&Inst))) FirstInsert = TrackSequenceInsert(FirstInsert); - else if (FirstInsert = dyn_cast(&Inst)) + else if ((FirstInsert = dyn_cast(&Inst))) FirstInsert = TrackSequenceInsert(FirstInsert); if (isa(FirstInsert->getOperand(0))) diff --git a/llvmraytracing/lib/SpecializeDriverShaders.cpp b/llvmraytracing/lib/SpecializeDriverShaders.cpp index 4166d5d385..62f1f41b3d 100644 --- a/llvmraytracing/lib/SpecializeDriverShaders.cpp +++ b/llvmraytracing/lib/SpecializeDriverShaders.cpp @@ -39,7 +39,7 @@ #include using namespace llvm; -using namespace CompilerUtils; +using namespace compilerutils; #define DEBUG_TYPE "specialize-driver-shaders" #ifndef NDEBUG @@ -926,14 +926,14 @@ struct SpecializeDriverShadersPassImpl { // Initialize a new value origin tracker for the current function. // Move AwaitOriginAssumptions into the VOT to prevent a copy, and reset the optional // to prevent unintended accesses. - CompilerUtils::ValueOriginTracker::Options Opts{}; + compilerutils::ValueOriginTracker::Options Opts{}; Opts.BytesPerSlice = ArgSlotSizeInBytes; Opts.MaxBytesPerValue = MaxNumAnalyzedArgSlots * ArgSlotSizeInBytes; // Handle freeze poison conservatively. Optimizing based on it requires to replace affected freeze poison // by something else (e.g. zeroinitializer), which means we'd need to change app shaders and not just // Traversal. As of now, in tests it didn't make a difference. - Opts.FreezeMode = CompilerUtils::ValueOriginTracker::Options::FreezeHandlingMode::Dynamic; - CompilerUtils::ValueOriginTracker VOT{DL, Opts, std::move(*ToBePreservedInputArgsInfo.AwaitOriginAssumptions)}; + Opts.FreezeMode = compilerutils::ValueOriginTracker::Options::FreezeHandlingMode::Dynamic; + compilerutils::ValueOriginTracker VOT{DL, Opts, std::move(*ToBePreservedInputArgsInfo.AwaitOriginAssumptions)}; ToBePreservedInputArgsInfo.AwaitOriginAssumptions.reset(); // Do a bulk value origin analysis on all relevant outgoing args. This is more efficient than individual @@ -991,7 +991,7 @@ struct SpecializeDriverShadersPassImpl { unsigned NumReplacedDwords; }; - using ValueSpecializer = CompilerUtils::ValueSpecializer; + using ValueSpecializer = compilerutils::ValueSpecializer; SpecializeArgResult specializeArgument(const ArgSlotsInfo &SpecializationInfo, ValueSpecializer &VS, Argument *Arg, const ArgumentLayoutInfo &ArgumentLayoutInfo, unsigned GlobalArgSlotBegin) { diff --git a/llvmraytracing/test/CMakeLists.txt b/llvmraytracing/test/CMakeLists.txt index d0a3231c45..01d83b8f84 100644 --- a/llvmraytracing/test/CMakeLists.txt +++ b/llvmraytracing/test/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -50,3 +50,4 @@ add_lit_testsuites(LLVMRAYTRACING ${CMAKE_CURRENT_SOURCE_DIR} ${exclude_from_check_all} DEPENDS ${LLVMRAYTRACING_TEST_DEPENDS} ) + diff --git a/llvmraytracing/test/intrinsics/discard-values.ll b/llvmraytracing/test/intrinsics/discard-values.ll index e0e7c3810b..26037e5c3a 100644 --- a/llvmraytracing/test/intrinsics/discard-values.ll +++ b/llvmraytracing/test/intrinsics/discard-values.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt --verify-each -passes='dxil-cont-prepare-gpurt-library,lint' -S %s --lint-abort-on-error | FileCheck %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %struct.AnyHitData = type { float, i32 } %struct.DispatchSystemData = type { i32 } diff --git a/llvmraytracing/test/intrinsics/get-func-addr-not-found.ll b/llvmraytracing/test/intrinsics/get-func-addr-not-found.ll index 2a935355e9..a42c8cccf6 100644 --- a/llvmraytracing/test/intrinsics/get-func-addr-not-found.ll +++ b/llvmraytracing/test/intrinsics/get-func-addr-not-found.ll @@ -1,4 +1,29 @@ ; NOTE: Do not autogenerate +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; RUN: not --crash opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint' -S %s --lint-abort-on-error 2>&1 | FileCheck %s ; CHECK: ERROR: Did not find function '' requested by _AmdGetFuncAddr diff --git a/llvmraytracing/test/intrinsics/get-func-addr.ll b/llvmraytracing/test/intrinsics/get-func-addr.ll index 57786c8169..a05559a9fd 100644 --- a/llvmraytracing/test/intrinsics/get-func-addr.ll +++ b/llvmraytracing/test/intrinsics/get-func-addr.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint' -S %s --lint-abort-on-error | FileCheck %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %struct.DispatchSystemData = type { i32 } @@ -17,14 +41,14 @@ define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwin define { i32, i32 } @main() !lgc.rt.shaderstage !10 { ; CHECK-LABEL: define void @main -; CHECK-SAME: (i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META10:![0-9]+]] !continuation.registercount [[META5]] !continuation [[META11:![0-9]+]] { +; CHECK-SAME: (i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], {} [[TMP1:%.*]], [0 x i32] [[TMP2:%.*]], [0 x i32] [[TMP3:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META11:![0-9]+]] !continuation.registercount [[META5]] !continuation [[META12:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [0 x i32], align 4 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: call void @lgc.ilcps.setLocalRootIndex(i32 0) -; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyFunc) -; CHECK-NEXT: [[V0:%.*]] = insertvalue { i32, i32 } undef, i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference(ptr @MyFunc) +; CHECK-NEXT: [[V0:%.*]] = insertvalue { i32, i32 } undef, i32 [[TMP4]], 0 ; CHECK-NEXT: call void @lgc.cps.complete() ; CHECK-NEXT: unreachable ; diff --git a/llvmraytracing/test/intrinsics/shader-start.ll b/llvmraytracing/test/intrinsics/shader-start.ll index f1b6d9f72d..54346e1c9f 100644 --- a/llvmraytracing/test/intrinsics/shader-start.ll +++ b/llvmraytracing/test/intrinsics/shader-start.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint' -S %s --lint-abort-on-error | FileCheck %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %struct.DispatchSystemData = type { i32 } %struct.SystemData = type { %struct.DispatchSystemData } @@ -13,7 +37,7 @@ declare !pointeetys !{%struct.DispatchSystemData poison} void @_cont_DispatchRay define void @main() !lgc.rt.shaderstage !10 { ; CHECK-LABEL: define void @main( -; CHECK-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META6:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META7:![0-9]+]] { +; CHECK-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META6:![0-9]+]] !lgc.cps [[META7:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META8:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 @@ -31,7 +55,7 @@ entry: define void @_cont_ShaderStart(%struct.DispatchSystemData* %data) !pointeetys !11 { ; CHECK-LABEL: define void @_cont_ShaderStart( -; CHECK-SAME: ptr [[DATA:%.*]]) !pointeetys [[META4:![0-9]+]] { +; CHECK-SAME: ptr [[DATA:%.*]]) !pointeetys [[META5:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 ; CHECK-NEXT: store i32 123, ptr [[TMP0]], align 4 @@ -61,8 +85,8 @@ entry: !15 = !{%struct.SystemData poison} ;. ; CHECK: [[META0]] = !{i32 30} -; CHECK: [[META4]] = !{%struct.DispatchSystemData poison} -; CHECK: [[META5]] = !{i32 1} -; CHECK: [[META6]] = !{i32 5} -; CHECK: [[META7]] = !{ptr @main} +; CHECK: [[META5]] = !{%struct.DispatchSystemData poison} +; CHECK: [[META6]] = !{i32 1} +; CHECK: [[META7]] = !{i32 5} +; CHECK: [[META8]] = !{ptr @main} ;. diff --git a/llvmraytracing/test/lgccps/CpsLowering/continuation-basic.ll b/llvmraytracing/test/lgccps/CpsLowering/continuation-basic.ll index f4e0c9d3ce..22352adb78 100644 --- a/llvmraytracing/test/lgccps/CpsLowering/continuation-basic.ll +++ b/llvmraytracing/test/lgccps/CpsLowering/continuation-basic.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s -S | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare void @lgc.cps.jump(...) noreturn diff --git a/llvmraytracing/test/lgccps/CpsLowering/cps-entry-point.ll b/llvmraytracing/test/lgccps/CpsLowering/cps-entry-point.ll index 04ec21efc4..0bb4342d15 100644 --- a/llvmraytracing/test/lgccps/CpsLowering/cps-entry-point.ll +++ b/llvmraytracing/test/lgccps/CpsLowering/cps-entry-point.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s -S | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare void @lgc.cps.jump(...) #0 diff --git a/llvmraytracing/test/lgccps/CpsLowering/cps-from-continufy.ll b/llvmraytracing/test/lgccps/CpsLowering/cps-from-continufy.ll index 7ef8bd6050..bddd931464 100644 --- a/llvmraytracing/test/lgccps/CpsLowering/cps-from-continufy.ll +++ b/llvmraytracing/test/lgccps/CpsLowering/cps-from-continufy.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s -S | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %_rgen_1.Frame = type { ptr addrspace(7), ptr addrspace(7), i32 } diff --git a/llvmraytracing/test/lgccps/CpsLowering/cps-stack-lowering-spirv-global.ll b/llvmraytracing/test/lgccps/CpsLowering/cps-stack-lowering-spirv-global.ll new file mode 100644 index 0000000000..c7d96ae3af --- /dev/null +++ b/llvmraytracing/test/lgccps/CpsLowering/cps-stack-lowering-spirv-global.ll @@ -0,0 +1,268 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -o - -passes='cleanup-continuations' %s -S | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%_rgen_1.Frame = type { ptr addrspace(1), ptr addrspace(1), i32 } + +declare void @lgc.cps.jump(...) #0 + +declare ptr addrspace(32) @lgc.cps.alloc(i32) + +declare void @lgc.cps.free(i32) + +declare i32 @lgc.cps.as.continuation.reference(ptr) + +declare ptr addrspace(32) @lgc.cps.peek(i32) + +declare ptr addrspace(32) @lgc.cps.get.vsp() + +declare i32 @lgc.cps.get.dummy.index(i32) + +declare void @lgc.cps.complete() + +declare i64 @_cont_GetContinuationStackGlobalMemBase() + +define { ptr, ptr } @test.0(ptr %0) !lgc.cps !1 !lgc.rt.shaderstage !2 !continuation !3 { +; CHECK-LABEL: define void @test.0( +; CHECK-SAME: ) !lgc.cps [[META0:![0-9]+]] !lgc.rt.shaderstage [[META1:![0-9]+]] !continuation [[META2:![0-9]+]] !continuation.state [[META3:![0-9]+]] { +; CHECK-NEXT: [[ALLOCASPILLBB:.*:]] +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 12 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP3]], i32 [[TMP0]] +; CHECK-NEXT: store i32 333, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP3]], i32 [[TMP4]] +; CHECK-NEXT: store i32 111, ptr addrspace(1) [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], 9 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP3]], i32 [[TMP7]] +; CHECK-NEXT: store i8 99, ptr addrspace(1) [[TMP8]], align 1 +; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, i32 [[TMP10]], i32 poison, i32 6, i32 [[TMP7]], i32 [[TMP4]]) +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %mem = call ptr addrspace(32) @lgc.cps.alloc(i32 10) + store i32 333, ptr addrspace(32) %mem, align 4 + %p1 = getelementptr i32, ptr addrspace(32) %mem, i32 1 + store i32 111, ptr addrspace(32) %p1, align 4 + %p2 = getelementptr i8, ptr addrspace(32) %mem, i32 9 + store i8 99, ptr addrspace(32) %p2, align 1 + %q1 = ptrtoint ptr addrspace(32) %p1 to i32 + %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) + call void (...) @lgc.cps.jump(i32 %cr, i32 2, i32 poison, i32 poison, i32 6, ptr addrspace(32) %p2, i32 %q1) + unreachable +} + +define { ptr, ptr } @test.1(ptr addrspace(32) %p2, i32 %q1, ptr %0) !lgc.cps !1 !lgc.rt.shaderstage !2 !continuation !4 { +; CHECK-LABEL: define void @test.1( +; CHECK-SAME: i32 [[P2:%.*]], i32 [[Q1:%.*]]) !lgc.cps [[META0]] !lgc.rt.shaderstage [[META1]] !continuation [[META4:![0-9]+]] !continuation.state [[META3]] { +; CHECK-NEXT: [[ALLOCASPILLBB:.*:]] +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP1]], i32 [[Q1]] +; CHECK-NEXT: [[N111:%.*]] = load i32, ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP1]], i32 [[P2]] +; CHECK-NEXT: [[N99:%.*]] = load i8, ptr addrspace(1) [[TMP3]], align 1 +; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @test.2) +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, i32 [[TMP4]], i32 poison, i32 poison) +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %p1 = inttoptr i32 %q1 to ptr addrspace(32) + %n111 = load i32, ptr addrspace(32) %p1, align 4 + %n99 = load i8, ptr addrspace(32) %p2, align 1 + %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.2) + call void (...) @lgc.cps.jump(i32 %cr, i32 2, i32 poison, i32 poison, i32 poison) + unreachable +} + +define { ptr, ptr } @test.2(ptr %0) !lgc.cps !1 !lgc.rt.shaderstage !2 !continuation !5 { +; CHECK-LABEL: define void @test.2( +; CHECK-SAME: ) !lgc.cps [[META0]] !lgc.rt.shaderstage [[META1]] !continuation [[META5:![0-9]+]] !continuation.state [[META3]] { +; CHECK-NEXT: [[ALLOCASPILLBB:.*:]] +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -12 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP3]], i32 [[TMP1]] +; CHECK-NEXT: [[N333:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -12 +; CHECK-NEXT: store i32 [[TMP5]], ptr [[CSP]], align 4 +; CHECK-NEXT: ret void +; +AllocaSpillBB: + %mem = call ptr addrspace(32) @lgc.cps.peek(i32 10) + %n333 = load i32, ptr addrspace(32) %mem, align 4 + call void @lgc.cps.free(i32 10) + call void @lgc.cps.complete() + unreachable +} + +define { ptr, ptr } @test.gep(ptr %0) !lgc.cps !1 !lgc.rt.shaderstage !2 !continuation !6 { +; CHECK-LABEL: define void @test.gep( +; CHECK-SAME: ) !lgc.cps [[META0]] !lgc.rt.shaderstage [[META1]] !continuation [[META6:![0-9]+]] !continuation.state [[META3]] { +; CHECK-NEXT: [[ALLOCASPILLBB:.*:]] +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 12 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[STACK_EL0:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[STACK_EL0]], 24 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP3]] +; CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[TMP11]], align 4 +; CHECK-NEXT: [[STACK_EL1:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 1) +; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[STACK_EL1]], 24 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP8]] +; CHECK-NEXT: store i32 [[TMP10]], ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[STACK_EL2:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 2) +; CHECK-NEXT: [[STACK_EL2_DIV:%.*]] = sdiv i32 [[STACK_EL2]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[STACK_EL2_DIV]], 24 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], -8 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP6]], i32 [[TMP15]] +; CHECK-NEXT: store i32 [[TMP17]], ptr addrspace(1) [[TMP18]], align 4 +; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, i32 [[TMP20]], i32 poison, i32 5, i32 [[TMP17]], i32 [[TMP17]]) +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %mem = call ptr addrspace(32) @lgc.cps.alloc(i32 10) + %stack.el0 = call i32 @lgc.cps.get.dummy.index(i32 0) + %1 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el0 + %vsp = call ptr addrspace(32) @lgc.cps.get.vsp() + %vsp.i = ptrtoint ptr addrspace(32) %vsp to i32 + store i32 %vsp.i, ptr addrspace(32) %1, align 4 + %stack.el1 = call i32 @lgc.cps.get.dummy.index(i32 1) + %2 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el1 + %vsp.2 = call ptr addrspace(32) @lgc.cps.peek(i32 4) + %vsp.2.i = ptrtoint ptr addrspace(32) %vsp.2 to i32 + store i32 %vsp.2.i, ptr addrspace(32) %2, align 4 + %stack.el2 = call i32 @lgc.cps.get.dummy.index(i32 2) + %stack.el2.div = sdiv i32 %stack.el2, 2 + %3 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el2.div, i32 1 + %vsp.3 = call ptr addrspace(32) @lgc.cps.peek(i32 8) + %vsp.3.i = ptrtoint ptr addrspace(32) %vsp.3 to i32 + store i32 %vsp.3.i, ptr addrspace(32) %3, align 4 + %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) + call void (...) @lgc.cps.jump(i32 %cr, i32 2, i32 poison, i32 poison, i32 5, ptr addrspace(32) %vsp.3, i32 %vsp.3.i) + unreachable +} + +define { ptr, ptr } @test.nested.gep(ptr %0) !lgc.cps !1 !lgc.rt.shaderstage !2 !continuation !7 { +; CHECK-LABEL: define void @test.nested.gep( +; CHECK-SAME: ) !lgc.cps [[META0]] !lgc.rt.shaderstage [[META1]] !continuation [[META7:![0-9]+]] !continuation.state [[META3]] { +; CHECK-NEXT: [[ALLOCASPILLBB:.*:]] +; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 12 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[CSP]], align 4 +; CHECK-NEXT: [[STACK_EL0:%.*]] = call i32 @lgc.cps.get.dummy.index(i32 0) +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[STACK_EL0]], 24 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 16 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP7]], i32 [[TMP4]] +; CHECK-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP9]], align 4 +; CHECK-NEXT: [[CR:%.*]] = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 +; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[CR]], i32 2, i32 [[TMP8]], i32 poison, i32 4, i32 [[TMP5]], i32 [[TMP5]]) +; CHECK-NEXT: unreachable +; +AllocaSpillBB: + %mem = call ptr addrspace(32) @lgc.cps.alloc(i32 10) + %stack.el0 = call i32 @lgc.cps.get.dummy.index(i32 0) + %gep.base = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %mem, i32 %stack.el0 + %1 = getelementptr inbounds %_rgen_1.Frame, ptr addrspace(32) %gep.base, i32 0, i32 2 + %vsp = call ptr addrspace(32) @lgc.cps.get.vsp() + %vsp.i = ptrtoint ptr addrspace(32) %vsp to i32 + store i32 %vsp.i, ptr addrspace(32) %1, align 4 + %cr = call i32 @lgc.cps.as.continuation.reference(ptr @test.1) + call void (...) @lgc.cps.jump(i32 %cr, i32 2, i32 poison, i32 poison, i32 4, ptr addrspace(32) %vsp, i32 %vsp.i) + unreachable +} + +declare !continuation !3 { ptr, ptr } @continuation.prototype.test.0(ptr, i1) + +declare ptr @continuation.malloc(i32) + +declare void @continuation.free(ptr) + +declare token @llvm.coro.id.retcon(i32, i32, ptr, ptr, ptr, ptr) #1 + +declare ptr @llvm.coro.begin(token, ptr writeonly) #1 + +declare !continuation !4 { ptr, ptr } @continuation.prototype.test.1(ptr, i1) + +declare !continuation !5 { ptr, ptr } @continuation.prototype.test.2(ptr, i1) + +declare !continuation !6 { ptr, ptr } @continuation.prototype.test.gep(ptr, i1) + +declare !continuation !7 { ptr, ptr } @continuation.prototype.test.nested.gep(ptr, i1) + +attributes #0 = { noreturn } +attributes #1 = { nounwind } + +!continuation.stackAddrspace = !{!0} + +!0 = !{i32 1} +!1 = !{i32 1} +!2 = !{i32 7} +!3 = !{ptr @test.0} +!4 = !{ptr @test.1} +!5 = !{ptr @test.2} +!6 = !{ptr @test.gep} +!7 = !{ptr @test.nested.gep} +;. +; CHECK: [[META0]] = !{i32 1} +; CHECK: [[META1]] = !{i32 7} +; CHECK: [[META2]] = !{ptr @test.0} +; CHECK: [[META3]] = !{i32 0} +; CHECK: [[META4]] = !{ptr @test.1} +; CHECK: [[META5]] = !{ptr @test.2} +; CHECK: [[META6]] = !{ptr @test.gep} +; CHECK: [[META7]] = !{ptr @test.nested.gep} +;. diff --git a/llvmraytracing/test/lgccps/CpsLowering/cps-stack-lowering.ll b/llvmraytracing/test/lgccps/CpsLowering/cps-stack-lowering.ll index b46e141633..4492a72a6a 100644 --- a/llvmraytracing/test/lgccps/CpsLowering/cps-stack-lowering.ll +++ b/llvmraytracing/test/lgccps/CpsLowering/cps-stack-lowering.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s -S | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare void @lgc.cps.jump(...) noreturn declare ptr addrspace(32) @lgc.cps.alloc(i32) diff --git a/llvmraytracing/test/lgccps/CpsLowering/cps-unify-exits.ll b/llvmraytracing/test/lgccps/CpsLowering/cps-unify-exits.ll index 7251051390..75300012d1 100644 --- a/llvmraytracing/test/lgccps/CpsLowering/cps-unify-exits.ll +++ b/llvmraytracing/test/lgccps/CpsLowering/cps-unify-exits.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s -S | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare void @lgc.cps.jump(...) noreturn diff --git a/llvmraytracing/test/lgccps/alloca-select.ll b/llvmraytracing/test/lgccps/alloca-select.ll index 8088266d88..be77a198c6 100644 --- a/llvmraytracing/test/lgccps/alloca-select.ll +++ b/llvmraytracing/test/lgccps/alloca-select.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 2 ; RUN: opt --verify-each -S -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare !lgc.cps !0 void @callee({}, i32, float) diff --git a/llvmraytracing/test/lgccps/await-if-else.ll b/llvmraytracing/test/lgccps/await-if-else.ll index afe3a51bb0..863da84d7e 100644 --- a/llvmraytracing/test/lgccps/await-if-else.ll +++ b/llvmraytracing/test/lgccps/await-if-else.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare !lgc.cps !0 void @callee({}, i32, float) declare !lgc.cps !0 void @callee2({}, i32, float) diff --git a/llvmraytracing/test/lgccps/await-if.ll b/llvmraytracing/test/lgccps/await-if.ll index 7b5dec9968..6b0502f558 100644 --- a/llvmraytracing/test/lgccps/await-if.ll +++ b/llvmraytracing/test/lgccps/await-if.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare !lgc.cps !0 void @callee({}, i32, float) diff --git a/llvmraytracing/test/lgccps/await-in-loop.ll b/llvmraytracing/test/lgccps/await-in-loop.ll index d4ba4a6af3..030248ddbb 100644 --- a/llvmraytracing/test/lgccps/await-in-loop.ll +++ b/llvmraytracing/test/lgccps/await-in-loop.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare !lgc.cps !0 void @callee({}, i32, i32) diff --git a/llvmraytracing/test/lgccps/call-shader-i1-payload.ll b/llvmraytracing/test/lgccps/call-shader-i1-payload.ll index 3026e91ca6..1e08f5a2cc 100644 --- a/llvmraytracing/test/lgccps/call-shader-i1-payload.ll +++ b/llvmraytracing/test/lgccps/call-shader-i1-payload.ll @@ -1,6 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='lower-raytracing-pipeline' %s | FileCheck --check-prefixes=LOWER-RAYTRACING-PIPELINE %s ; RUN: opt --verify-each -S -o - -passes='lower-raytracing-pipeline,sroa' %s | FileCheck --check-prefixes=SROA %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; The test checks the payload alloca is fully written and be promoted to register successfully. @@ -54,7 +78,7 @@ attributes #1 = { nounwind willreturn memory(argmem: readwrite, inaccessiblemem: !8 = !{!"function", i1 poison, !7, float poison, i32 poison} ; LOWER-RAYTRACING-PIPELINE-LABEL: define void @called( -; LOWER-RAYTRACING-PIPELINE-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [2 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META4:![0-9]+]] !lgc.cps [[META1:![0-9]+]] !continuation.registercount [[META1]] !continuation [[META5:![0-9]+]] { +; LOWER-RAYTRACING-PIPELINE-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [2 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META1:![0-9]+]] !continuation.registercount [[META1]] !continuation [[META6:![0-9]+]] { ; LOWER-RAYTRACING-PIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [2 x i32], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 @@ -104,7 +128,7 @@ attributes #1 = { nounwind willreturn memory(argmem: readwrite, inaccessiblemem: ; ; ; SROA-LABEL: define void @called( -; SROA-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [2 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META4:![0-9]+]] !lgc.cps [[META1:![0-9]+]] !continuation.registercount [[META1]] !continuation [[META5:![0-9]+]] { +; SROA-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [2 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META1:![0-9]+]] !continuation.registercount [[META1]] !continuation [[META6:![0-9]+]] { ; SROA-NEXT: [[DOTSROA_5:%.*]] = alloca i8, align 4 ; SROA-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[PAYLOAD]], 0 ; SROA-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[PAYLOAD]], 1 diff --git a/llvmraytracing/test/lgccps/cleanup-store-loads.ll b/llvmraytracing/test/lgccps/cleanup-store-loads.ll index 687e1e8144..ac436561d4 100644 --- a/llvmraytracing/test/lgccps/cleanup-store-loads.ll +++ b/llvmraytracing/test/lgccps/cleanup-store-loads.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='cgscc(inline),cleanup-continuations' %s | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare !lgc.cps !0 void @callee({}, i32, float) diff --git a/llvmraytracing/test/lgccps/cps-no-await.ll b/llvmraytracing/test/lgccps/cps-no-await.ll index 8901e3363e..16f517f253 100644 --- a/llvmraytracing/test/lgccps/cps-no-await.ll +++ b/llvmraytracing/test/lgccps/cps-no-await.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='lower-await' %s | FileCheck --check-prefixes=LOWER-AWAIT %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; define void @_cont_Traversal() !lgc.cps !{i32 2} !continuation !{ptr @_cont_Traversal} { %pushconst = call ptr addrspace(4) @lgc.user.data(i32 32) diff --git a/llvmraytracing/test/lgccps/entry-point-with-cps.ll b/llvmraytracing/test/lgccps/entry-point-with-cps.ll index 36b33d51c0..d147d651bd 100644 --- a/llvmraytracing/test/lgccps/entry-point-with-cps.ll +++ b/llvmraytracing/test/lgccps/entry-point-with-cps.ll @@ -1,6 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s | FileCheck --check-prefixes=CHECK %s ; RUN: opt --verify-each -S -o - -passes='lower-await' %s | FileCheck --check-prefixes=LOWER-AWAIT %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This is example output for running continufy on the -in file. ; Details of the output are likely to differ from the final production pass, diff --git a/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-get-i32.ll b/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-get-i32.ll index f952e442b7..360d12718f 100644 --- a/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-get-i32.ll +++ b/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-get-i32.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint' -S %s --lint-abort-on-error | FileCheck %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %struct.AnyHitTraversalData = type { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %struct.DispatchSystemData = type { i32 } @@ -16,7 +40,7 @@ declare i32 @_AmdContPayloadRegistersGetI32(i32) define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @_cont_Traversal({ i32 } %0) local_unnamed_addr !lgc.shaderstage !0 !pointeetys !1 !lgc.rt.shaderstage !3 { ; CHECK-LABEL: define dso_local spir_func void @_cont_Traversal( -; CHECK-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [41 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META3:![0-9]+]] !lgc.rt.shaderstage [[META4:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !lgc.cps [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] { +; CHECK-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [41 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META4:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !lgc.cps [[META6:![0-9]+]] !continuation [[META7:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca { i32 }, align 8 ; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 diff --git a/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-i32-count.ll b/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-i32-count.ll index e4ad0cb4d3..bb70a1cec1 100644 --- a/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-i32-count.ll +++ b/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-i32-count.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint' -S %s --lint-abort-on-error | FileCheck %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %struct.AnyHitTraversalData = type { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %struct.DispatchSystemData = type { i32 } @@ -16,7 +40,7 @@ declare i32 @_AmdContPayloadRegistersI32Count() define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @_cont_Traversal({ i32 } %0) local_unnamed_addr !lgc.shaderstage !0 !pointeetys !1 !lgc.rt.shaderstage !3 { ; CHECK-LABEL: define dso_local spir_func void @_cont_Traversal( -; CHECK-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [41 x i32] [[PADDING:%.*]], [11 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META4:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.registercount [[META1:![0-9]+]] !lgc.cps [[META6:![0-9]+]] !continuation [[META7:![0-9]+]] { +; CHECK-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [41 x i32] [[PADDING:%.*]], [11 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META5:![0-9]+]] !lgc.rt.shaderstage [[META6:![0-9]+]] !continuation.registercount [[META1:![0-9]+]] !lgc.cps [[META7:![0-9]+]] !continuation [[META8:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca { i32 }, align 8 ; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [11 x i32], align 4 diff --git a/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-set-i32.ll b/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-set-i32.ll index 82f9d4bb4d..8cde479a23 100644 --- a/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-set-i32.ll +++ b/llvmraytracing/test/lgccps/intrinsics/cont-payload-registers-set-i32.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint' -S %s --lint-abort-on-error | FileCheck %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %struct.AnyHitTraversalData = type { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } %struct.DispatchSystemData = type { i32 } @@ -14,7 +38,7 @@ declare void @_AmdContPayloadRegistersSetI32(i32, i32) define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @_cont_Traversal({ i32 } %0) local_unnamed_addr !lgc.shaderstage !0 !pointeetys !1 !lgc.rt.shaderstage !3 { ; CHECK-LABEL: define dso_local spir_func void @_cont_Traversal( -; CHECK-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [41 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META3:![0-9]+]] !lgc.rt.shaderstage [[META4:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !lgc.cps [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] { +; CHECK-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [41 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META4:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !lgc.cps [[META6:![0-9]+]] !continuation [[META7:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca { i32 }, align 8 ; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 diff --git a/llvmraytracing/test/lgccps/multiple-await.ll b/llvmraytracing/test/lgccps/multiple-await.ll index 14e74d6e53..a9709e6d91 100644 --- a/llvmraytracing/test/lgccps/multiple-await.ll +++ b/llvmraytracing/test/lgccps/multiple-await.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare !lgc.cps !0 void @callee({}, i32, float) declare !lgc.cps !0 void @callee2({}, i32, float) diff --git a/llvmraytracing/test/lgccps/simple-await-more-state.ll b/llvmraytracing/test/lgccps/simple-await-more-state.ll index 210ed246b7..ebb3645616 100644 --- a/llvmraytracing/test/lgccps/simple-await-more-state.ll +++ b/llvmraytracing/test/lgccps/simple-await-more-state.ll @@ -1,5 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s | FileCheck --check-prefixes=CHECK %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare !lgc.cps !0 void @callee({}, i32, float) diff --git a/llvmraytracing/test/lgccps/simple-await.ll b/llvmraytracing/test/lgccps/simple-await.ll index a1b37422cc..7586df1676 100644 --- a/llvmraytracing/test/lgccps/simple-await.ll +++ b/llvmraytracing/test/lgccps/simple-await.ll @@ -1,6 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -S -o - -passes='lower-await,coro-early,lgc-coro-split,coro-cleanup,cleanup-continuations' %s | FileCheck --check-prefixes=CHECK %s ; RUN: opt --verify-each -S -o - -passes='lower-await' %s | FileCheck --check-prefixes=LOWER-AWAIT %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; declare !lgc.cps !0 void @callee({}, i32, float) diff --git a/llvmraytracing/test/lgccps/traversal-padding-hitattr-size.ll b/llvmraytracing/test/lgccps/traversal-padding-hitattr-size.ll index 253b3a966c..d7416f3f66 100644 --- a/llvmraytracing/test/lgccps/traversal-padding-hitattr-size.ll +++ b/llvmraytracing/test/lgccps/traversal-padding-hitattr-size.ll @@ -2,6 +2,30 @@ ; We run this test file twice with different max hit attribute sizes to test that e.g. padding depends correctly on the max hit attribute size. ; RUN: grep -v HITATTR_SIZE_8 %s | opt --verify-each -passes='lower-raytracing-pipeline,lint' -S --lint-abort-on-error | FileCheck -check-prefix=CHECK-ATTRSIZE-16 %s ; RUN: grep -v HITATTR_SIZE_16 %s | opt --verify-each -passes='lower-raytracing-pipeline,lint' -S --lint-abort-on-error | FileCheck -check-prefix=CHECK-ATTRSIZE-8 %s +;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; + ; Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + ; + ; Permission is hereby granted, free of charge, to any person obtaining a copy + ; of this software and associated documentation files (the "Software"), to + ; deal in the Software without restriction, including without limitation the + ; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + ; sell copies of the Software, and to permit persons to whom the Software is + ; furnished to do so, subject to the following conditions: + ; + ; The above copyright notice and this permission notice shall be included in all + ; copies or substantial portions of the Software. + ; + ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + ; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + ; IN THE SOFTWARE. + ; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %struct.AnyHitTraversalData = type { i32 } %struct.DispatchSystemData = type { i32 } @@ -16,7 +40,7 @@ declare i32 @_AmdGetCurrentFuncAddr() define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @_cont_Traversal({ i32 } %0) local_unnamed_addr !lgc.shaderstage !0 !pointeetys !1 !lgc.rt.shaderstage !2 { ; CHECK-ATTRSIZE-16-LABEL: define dso_local spir_func void @_cont_Traversal( -; CHECK-ATTRSIZE-16-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [4 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META5:![0-9]+]] !lgc.rt.shaderstage [[META6:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !lgc.cps [[META7:![0-9]+]] !continuation [[META8:![0-9]+]] { +; CHECK-ATTRSIZE-16-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [4 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META6:![0-9]+]] !lgc.rt.shaderstage [[META7:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !lgc.cps [[META8:![0-9]+]] !continuation [[META9:![0-9]+]] { ; CHECK-ATTRSIZE-16-NEXT: [[_ENTRY:.*:]] ; CHECK-ATTRSIZE-16-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca { i32 }, align 8 ; CHECK-ATTRSIZE-16-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [8 x i32], align 4 @@ -44,7 +68,7 @@ define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @ ; CHECK-ATTRSIZE-16-NEXT: unreachable ; ; CHECK-ATTRSIZE-8-LABEL: define dso_local spir_func void @_cont_Traversal( -; CHECK-ATTRSIZE-8-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [2 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META4:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !lgc.cps [[META6:![0-9]+]] !continuation [[META7:![0-9]+]] { +; CHECK-ATTRSIZE-8-SAME: i32 [[SHADERINDEX:%.*]], i32 [[RETURNADDR:%.*]], { i32 } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [2 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META5:![0-9]+]] !lgc.rt.shaderstage [[META6:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !lgc.cps [[META7:![0-9]+]] !continuation [[META8:![0-9]+]] { ; CHECK-ATTRSIZE-8-NEXT: [[_ENTRY:.*:]] ; CHECK-ATTRSIZE-8-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca { i32 }, align 8 ; CHECK-ATTRSIZE-8-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [8 x i32], align 4 @@ -110,14 +134,14 @@ declare void @lgc.cps.jump(...) local_unnamed_addr !6 = !{%struct.DispatchSystemData poison} ;. ; CHECK-ATTRSIZE-16: [[META0]] = !{i32 8} -; CHECK-ATTRSIZE-16: [[META5]] = !{i32 7} -; CHECK-ATTRSIZE-16: [[META6]] = !{i32 6} -; CHECK-ATTRSIZE-16: [[META7]] = !{i32 3} -; CHECK-ATTRSIZE-16: [[META8]] = !{ptr @_cont_Traversal} +; CHECK-ATTRSIZE-16: [[META6]] = !{i32 7} +; CHECK-ATTRSIZE-16: [[META7]] = !{i32 6} +; CHECK-ATTRSIZE-16: [[META8]] = !{i32 3} +; CHECK-ATTRSIZE-16: [[META9]] = !{ptr @_cont_Traversal} ;. ; CHECK-ATTRSIZE-8: [[META0]] = !{i32 8} -; CHECK-ATTRSIZE-8: [[META4]] = !{i32 7} -; CHECK-ATTRSIZE-8: [[META5]] = !{i32 6} -; CHECK-ATTRSIZE-8: [[META6]] = !{i32 3} -; CHECK-ATTRSIZE-8: [[META7]] = !{ptr @_cont_Traversal} +; CHECK-ATTRSIZE-8: [[META5]] = !{i32 7} +; CHECK-ATTRSIZE-8: [[META6]] = !{i32 6} +; CHECK-ATTRSIZE-8: [[META7]] = !{i32 3} +; CHECK-ATTRSIZE-8: [[META8]] = !{ptr @_cont_Traversal} ;. diff --git a/llvmraytracing/test/lit.cfg.py b/llvmraytracing/test/lit.cfg.py index 99c6ef3f43..2b2aa8b4b4 100644 --- a/llvmraytracing/test/lit.cfg.py +++ b/llvmraytracing/test/lit.cfg.py @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + # -*- Python -*- # Configuration file for the 'lit' test runner. diff --git a/llvmraytracing/test/lit.site.cfg.py.in b/llvmraytracing/test/lit.site.cfg.py.in index c918100fbc..5989d43ff5 100644 --- a/llvmraytracing/test/lit.site.cfg.py.in +++ b/llvmraytracing/test/lit.site.cfg.py.in @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + @LIT_SITE_CFG_IN_HEADER@ import sys diff --git a/llvmraytracing/unittests/CMakeLists.txt b/llvmraytracing/unittests/CMakeLists.txt index b9b62d3f95..5a6c179eb3 100644 --- a/llvmraytracing/unittests/CMakeLists.txt +++ b/llvmraytracing/unittests/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -65,3 +65,4 @@ target_link_libraries(LlvmRaytracingUnitTargetTests PRIVATE ) set_compiler_options(LlvmRaytracingUnitTargetTests) + diff --git a/llvmraytracing/unittests/lit.cfg.py b/llvmraytracing/unittests/lit.cfg.py index 5ddc2e1474..728e631c2d 100644 --- a/llvmraytracing/unittests/lit.cfg.py +++ b/llvmraytracing/unittests/lit.cfg.py @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + # Configuration file for the 'lit' test runner for unit tests. Based on the MLIR unit test config. import os diff --git a/llvmraytracing/unittests/lit.site.cfg.py.in b/llvmraytracing/unittests/lit.site.cfg.py.in index 8d6e567a9c..53547dbdcf 100644 --- a/llvmraytracing/unittests/lit.site.cfg.py.in +++ b/llvmraytracing/unittests/lit.site.cfg.py.in @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + @LIT_SITE_CFG_IN_HEADER@ import sys diff --git a/sharedme/xdl/CMakeLists.txt b/sharedme/xdl/CMakeLists.txt new file mode 100644 index 0000000000..95821b8e11 --- /dev/null +++ b/sharedme/xdl/CMakeLists.txt @@ -0,0 +1,93 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +cmake_minimum_required(VERSION 3.21) + +project(SharedMeXdl LANGUAGES CXX) + +set(LLPC_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../..") +include("${LLPC_SOURCE_DIR}/cmake/CompilerFlags.cmake") + +add_llvm_library(sharedme_xdl + lib/LgcXdlDialect.cpp + util/ElementType.cpp + + DEPENDS + intrinsics_gen + + LINK_COMPONENTS + Analysis + CompilerUtils + Core + Scalar + Support + TransformUtils +) + +target_include_directories(sharedme_xdl PUBLIC + $ + $ + $ + $ +) + +target_link_libraries(sharedme_xdl PUBLIC llvm_dialects ${extra_llvm_libs} llpc_version) +set_compiler_options(sharedme_xdl) + +set(SHAREDME_XDL_TABLEGEN_DEFINES) + +# TableGen for dialects +include("${LLPC_SOURCE_DIR}/cmake/DialectsTablegen.cmake") +set_dialects_tablegen_exe(SHAREDME_XDL) + +macro(sharedme_xdl_tablegen DIALECTNAME FILE OUTPUT_FILENAME) + set(LLVM_TARGET_DEFINITIONS "${FILE}") + set(TBLGEN_TARGET "${OUTPUT_FILENAME}TableGen") + + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/include/lgc") + tablegen(SHAREDME_XDL "include/lgc/${OUTPUT_FILENAME}.h.inc" + -gen-dialect-decls --dialect "${DIALECTNAME}" "${SHAREDME_XDL_TABLEGEN_DEFINES}" + EXTRA_INCLUDES + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${LLPC_SOURCE_DIR}/imported/llvm-dialects/include + ) + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/include/lib") + tablegen(SHAREDME_XDL "include/lib/${OUTPUT_FILENAME}.cpp.inc" + -gen-dialect-defs --dialect "${DIALECTNAME}" "${SHAREDME_XDL_TABLEGEN_DEFINES}" + EXTRA_INCLUDES + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${LLPC_SOURCE_DIR}/imported/llvm-dialects/include + ) + add_public_tablegen_target(${TBLGEN_TARGET}) + + add_dependencies(sharedme_xdl ${TBLGEN_TARGET}) + target_sources(sharedme_xdl PRIVATE ${FILE}) +endmacro() + +sharedme_xdl_tablegen(lgc.xdl include/lgc/LgcXdlDialect.td LgcXdlDialect) + +if(LLPC_SHAREDME_XDL_BUILD_TEST) + add_subdirectory(test) +endif() diff --git a/sharedme/xdl/include/lgc/CooperativeMatrix.td b/sharedme/xdl/include/lgc/CooperativeMatrix.td new file mode 100644 index 0000000000..c8f9d0fbd7 --- /dev/null +++ b/sharedme/xdl/include/lgc/CooperativeMatrix.td @@ -0,0 +1,294 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +class DivergentLgcCoopMatOp traits_ = []> + : DivergentLgcXdlOp; + +class LgcCoopMatOp traits_ = []> + : LgcXdlOp; + +def CooperativeMatrixLengthOp : LgcCoopMatOp<"length", [Memory<[]>, WillReturn]> { + let arguments = (ins CooperativeMatrixLayout:$layout, AttrI32:$k_size); + let results = (outs I32:$result); + + let summary = "get the length for the cooperative matrix"; + let description = [{ + Get the "length" of a matrix of the given layout, i.e. the number of matrix components stored per lane. + + 'layout' is layout of cooperative matrix. + 'k_size' is the matrix K size. + }]; +} + +def CooperativeMatrixLoadOp : DivergentLgcCoopMatOp<"load", [Memory<[(read)]>, Convergent, WillReturn]> { + let arguments = (ins value:$pointer, value:$stride, AttrI1:$col_major, CooperativeMatrixElementType:$elem_type, + CooperativeMatrixLayout:$layout, AttrI32:$memory_access, AttrI32:$alignment, AttrI32:$k_size); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "load the cooperative matrix elements per lane"; + let description = [{ + Load the elements of cooperative matrix per lane through a pointer. + + Return or vector containing all the elements of the cooperative matrix per lane. + + 'pointer' is the pointer address of the first element of the cooperative matrix stored in memory. + 'stride' is the stride in bytes in memory between the first elements in the source data. + 'col_major' is the order of the data loaded from memory, col-major or row-major. + 'elem_type' is the element type of the cooperative matrix. + 'layout' is the layout of the input cooperative matrix. + + 'memory_access' is a set of flags describing the memory. + - Bit 0 is set if the memory is volatile + - Bit 1 is set if the memory is coherent + - Bit 2 is set if the memory is temporal. + + 'alignment' is the alignment of this load operation. + 'k_size' is the matrix K size. + }]; +} + +def CooperativeMatrixStoreOp : DivergentLgcCoopMatOp<"store", [Memory<[(write)]>, Convergent]> { + let arguments = (ins value:$pointer, value:$stride, AttrI1:$col_major, CooperativeMatrixElementType:$elem_type, + CooperativeMatrixLayout:$layout, AttrI32:$memory_access, AttrI32:$alignment, + value:$store_value, AttrI32:$k_size); + let results = (outs); + + let summary = "Store cooperative matrix elements per lane to the memory"; + let description = [{ + Store cooperative matrix elements per lane to the memory through the pointer. The elements should be converted to + or type. + + 'pointer' is the pointer address of the data array in memory. + 'stride' is the stride in bytes in memory between the first elements in the source data. + 'col_major' is the order of the data stored into memory, col-major or row-major. + 'elem_type' is the element type of the cooperative matrix. + 'layout' is the layout of the input cooperative matrix. + + 'memory_access' is a set of flags describing the memory. + - Bit 0 is set if the memory is volatile + - Bit 1 is set if the memory is coherent + - Bit 2 is set if the memory is temporal. + + 'alignment' is the alignment of this store operation. + 'store_value' is the elements of the cooperative matrix perlane typed in or to be stored in memory. + 'k_size' is the matrix K size. + }]; +} + +def CooperativeMatrixFillOp : LgcCoopMatOp<"fill", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$scalar, CooperativeMatrixElementType:$elem_type, CooperativeMatrixLayout:$layout, AttrI32:$k_size); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "Return a matrix filled with a scalar value"; + let description = [{ + Return a matrix whose elements are all equal to the given `scalar`. + + 'scalar' is the value to fill the cooperative matrix. + 'elem_type' is the element type for the cooperative matrix. + 'layout' is the layout of the input cooperative matrix. + 'k_size' is the matrix K size. + }]; +} + +def CooperativeMatrixExtractOp : DivergentLgcCoopMatOp<"extract", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$matrix, value:$index, CooperativeMatrixElementType:$elem_type, CooperativeMatrixLayout:$layout); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "return the element extracted from the cooperative matrix by index"; + let description = [{ + Returns the value at the given `index` in the input matrix. + + 'matrix' is the matrix from which to extract a component. + 'index' is the index to be extracted. + 'elem_type' is the element type for the cooperativ ematrix. + 'layout' is the layout of the input cooperative matrix. + }]; +} + +def CooperativeMatrixInsertOp : DivergentLgcCoopMatOp<"insert", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$matrix, value:$insert_value, value:$index, CooperativeMatrixElementType:$elem_type, CooperativeMatrixLayout:$layout); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "Insert the element into the cooperative matrix"; + let description = [{ + Insert the given `insert_value` at the given `index` into the input matrix and return the matrix. + + 'matrix' is the matrix from which to extract a component. + 'value' is the value to be inserted. + 'index' is the index to be inserted. + 'elem_type' is the element type for the cooperative matrix. + 'layout' is the layout of the input cooperative matrix. + }]; +} + +def CooperativeMatrixConvertOp : DivergentLgcCoopMatOp<"convert", [Memory<[(read)]>, Convergent, WillReturn]> { + let arguments = (ins AttrI32:$cast_op, value:$source, CooperativeMatrixElementType:$src_elem_type, CooperativeMatrixElementType:$dst_elem_type, + CooperativeMatrixLayout:$src_layout, CooperativeMatrixLayout:$dst_layout); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "Reshape the layout for cooperative matrix or cooperative matrix element-wise-conversion operation"; + let description = [{ + This operation will convert the input matrix into either a different layout or convert the elements into a different type and return the result. + + 'cast_op' is the conversion operation. 0 means reshape on cooperative matrix layout, other values are for element-wise-conversion. + 'source' is the source cooperative matrix. + 'src_elem_type' is the source cooperative matrix's element type. + 'dst_elem_type' is the destination cooperative matrix's element type. + 'src_layout' is the layout for source cooperative matrix. + 'dst_layout' is the layout for target cooperative matrix. + }]; +} + +def CooperativeMatrixTransposeOp : DivergentLgcCoopMatOp<"transpose", [Convergent, WillReturn]> { + let arguments = (ins value:$matrix, CooperativeMatrixElementType:$elem_type, CooperativeMatrixLayout:$layout); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "Transpose the cooperative matrix in register level and change its layout"; + let description = [{ + This operation will transpose the input matrix and return the transposed matrix. + + 'matrix' is the original cooprative matrix for transposition. + 'elem_type' is the element type for the cooperative matrix. + 'layout' is the layout of the input cooperative matrix. + }]; +} + +def CooperativeMatrixBinaryOp : DivergentLgcCoopMatOp<"binary", [Convergent, WillReturn]> { + let arguments = (ins CooperativeMatrixArithOp:$arith_op, value:$lhs, value:$rhs, CooperativeMatrixElementType:$elem_type, + CooperativeMatrixLayout:$layout); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "Cooperativematrix binary operation"; + let description = [{ + Perform a binary operation on two matrices and return the resulting matrix. + The two input matrices need to have the same layout and element type. + + 'arith_op' is the arithmetic operation. + 'lhs' is the first operation of cooperative matrix. + 'rhs' is the second operation of cooperative matrix. + 'elem_type' is the element type of cooperative matrix. + 'layout' is the layout of the input cooperative matrix. + }]; +} + +def CooperativeMatrixTimesScalarOp : DivergentLgcCoopMatOp<"times.scalar", [Convergent, WillReturn]> { + let arguments = (ins value:$matrix, value:$scalar, CooperativeMatrixElementType:$elem_type, + CooperativeMatrixLayout:$layout); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "Compute Matrix x Scalar and return the resulting cooperative matrix"; + let description = [{ + Multiply all matrix elements in the input matrix by the given `scalar`. + + 'matrix' is the matrix operand for the operation. + 'scalar' is the scalar operand for the operation. + 'elem_type' is the element type for the cooperativematrix operand. + 'layout' is the layout for the cooperative matrix. + }]; +} + +def CooperativeMatrixMulAddOp : DivergentLgcCoopMatOp<"muladd", [Convergent, WillReturn]> { + let arguments = (ins value:$matrix_a, value:$matrix_b, value:$matrix_c, AttrI1:$is_signed_a, AttrI1:$is_signed_b, + AttrI1:$is_sat_or_opsel, AttrI1:$is_tied, CooperativeMatrixElementType:$matrix_a_elem_type, + CooperativeMatrixElementType:$matrix_b_elem_type, CooperativeMatrixElementType:$matrix_c_elem_type, + CooperativeMatrixElementType:$matrix_d_elem_type, AttrI32:$k_multiplier); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "Calculate `matrix_a` * `matrix_b` + `matrix_c`"; + let description = [{ + Multiply `matrix_a` by `matrix_b` and add `matrix_c`. The resulting matrix has the same type as `matrix_c`. + + 'matrix_a' is the factor cooperative matrix whose use is MatrixAKHR. + 'matrix_b' is the factor cooperative matrix whose use is MatrixBKHR. + 'matrix_c' is the accumulator cooperative matrix whose use is MatrixAccumulatorKHR. + 'is_signed_a' is the signess for matrix_a's element type. + 'is_signed_b' is the signess for matrix_b's element type. + + 'is_sat_or_opsel' is the saturatingAccumulation for calculation, + In the case of 16-bit floating point matrices, this bit acts as an opsel bit, + if it is set to false, we store the result in the lower half of + the registers. If it is true, we store it in the upper half. + + 'is_tied' is the flag of the output matrix has to be the same + as the input accumulator (i.e., D has to be C) + + '$matrix_a_elem_type' is the component type of the matrix A + '$matrix_b_elem_type' is the component type of the matrix B + '$matrix_c_elem_type' is the component type of the matrix C + '$matrix_d_elem_type' is the component type of the matrix D + '$k_multiplier' is the multiplier for the matrix K size. + }]; +} + +def CooperativeMatrixPackOp : DivergentLgcCoopMatOp<"pack", [Memory<[(read)]>, WillReturn]> { + let arguments = (ins value:$matrix_c_lo, value:$matrix_c_hi); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "Cooperativematrix pack operation"; + let description = [{ + This is to pack two accumulator cooperativematrices and store in the same registers. + + 'matrix_c_lo' is the lower accumulator cooperative matrix to be packed. + 'matrix_c_hi' is the upper accumulator cooperative matrix to be packed. + }]; +} + +def CooperativeMatrixUnPackOp : DivergentLgcCoopMatOp<"unpack", [Memory<[(read)]>, WillReturn]> { + let arguments = (ins value:$packed_matrix, AttrI1:$get_upper_half); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "Restores an unpacked matrix from a packed accumulator"; + let description = [{ + Returns the unpacked matrix stored in either the upper or lower half of a packed accumulator. + + 'packed_matrix' is the packed Accumulator cooperative matrix. + + 'get_upper_half' is the flag of getting the upper half or lower half of the register. + - if it's true, it will unpack cooperative matrix stored in the upper half register. + - if it's false, it will unpack cooperative matrix stored in the lower half register. + }]; +} diff --git a/sharedme/xdl/include/lgc/LgcXdlDialect.h b/sharedme/xdl/include/lgc/LgcXdlDialect.h new file mode 100644 index 0000000000..3c1e651b59 --- /dev/null +++ b/sharedme/xdl/include/lgc/LgcXdlDialect.h @@ -0,0 +1,37 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file LgcXdlDialect.h + * @brief Declarations for the LGC XDL IR dialect + *********************************************************************************************************************** + */ +#pragma once + +#include "lgc/LgcXdlTypes.h" + +#define GET_INCLUDES +#define GET_DIALECT_DECLS +#include "lgc/LgcXdlDialect.h.inc" diff --git a/sharedme/xdl/include/lgc/LgcXdlDialect.td b/sharedme/xdl/include/lgc/LgcXdlDialect.td new file mode 100644 index 0000000000..4b05dfee89 --- /dev/null +++ b/sharedme/xdl/include/lgc/LgcXdlDialect.td @@ -0,0 +1,62 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +include "llvm-dialects/Dialect/Dialect.td" + +def LgcXdlDialect : Dialect { + let name = "lgc.xdl"; + let cppNamespace = "lgc::xdl"; +} + +def ConstantPointer : TgConstant<(PointerType 4)>, Type; +def PrivatePointer : TgConstant<(PointerType 5)>, Type; +def BufferPointer : TgConstant<(PointerType 7)>, Type; +def BufferStridedPointer : TgConstant<(PointerType 9)>, Type; +def TaskPayloadPointer : TgConstant<(PointerType 7)>, Type; + +#ifdef LLVM_HAVE_NODIVERGENCESOURCE_ATTR +def NoDivergenceSource : LlvmEnumAttributeTrait<"NoDivergenceSource">; +#endif + +defm CooperativeMatrixMemoryAccess : AttrEnum<"CooperativeMatrixMemoryAccess">; +defm CooperativeMatrixElementType : AttrEnum<"CooperativeMatrixElementType">; +defm CooperativeMatrixLayout : AttrEnum<"CooperativeMatrixLayout">; +defm CooperativeMatrixArithOp : AttrEnum<"CooperativeMatrixArithOp">; + +class DivergentLgcXdlOp traits_ = []> + : Op; + +class LgcXdlOp traits_ = []> + : DivergentLgcXdlOp; + +class LgcXdlIntrinOp traits_ = []> + : LgcXdlOp; + +include "lgc/CooperativeMatrix.td" +include "lgc/RowAccumulator.td" diff --git a/sharedme/xdl/include/lgc/LgcXdlTypes.h b/sharedme/xdl/include/lgc/LgcXdlTypes.h new file mode 100644 index 0000000000..d1b82f4af9 --- /dev/null +++ b/sharedme/xdl/include/lgc/LgcXdlTypes.h @@ -0,0 +1,85 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file LgcXdlTypes.h + * @brief Declarations for the LGC XDL Types and Enumerations + *********************************************************************************************************************** + */ +#pragma once + +namespace lgc::xdl { + +enum class CooperativeMatrixMemoryAccess : unsigned { + MemoryAccessMaskNone = 0x00, // No mask + MemoryAccessVolatileMask = 0x01, // Access memory in volatile + MemoryAccessCoherentMask = 0x02, // Access memory in coherent + MemoryAccessTemporalMask = 0x04, // Access memory in temporal +}; + +enum class CooperativeMatrixElementType : unsigned { + Unknown = 0, // Unknown + Float16, // 16-bit floating-point + Float32, // 32-bit floating-point + Int8, // 8-bit integer + Int16, // 16-bit integer + Int32, // 32 bit integer + Float16Packed, // packed 16-bit floating-point + BFloat16, // 16-bit brain floating-point + Float8, // 8-bit floating-point + BFloat8, // 8-bit brain floating-point + Int4, // 4-bit integer +}; + +// Layout is virtual concept, eg: 16bit and 32bit for matrixC will share the same layout initially. +// It will be passed as the argument of getTypeProperties to calculate the more detailed layout information. +enum class CooperativeMatrixLayout : unsigned { + FactorMatrixLayout = 0, // A/B layout on gfx10/gfx11 + AccumulatorMatrixLayout, // C/D layout on gfx11 + Gfx10AccumulatorMatrixLayout, // 32bit@C/D layout on gfx10 + Gfx10Accumulator16bitMatrixLayout, // 16bit@C/D layout on gfx10 + InvalidLayout +}; + +// The cooperative matrix arithmetic operations the builder can consume. +// NOTE: We rely on casting this implicitly to an integer, so we cannot use an enum class. +enum class CooperativeMatrixArithOp : unsigned { + IAdd = 0, + FAdd, + ISub, + FSub, + IMul, + FMul, + UDiv, + SDiv, + FDiv, + UMod, + SRem, + SMod, + FRem, + FMod +}; + +} // namespace lgc::xdl diff --git a/sharedme/xdl/include/lgc/RowAccumulator.td b/sharedme/xdl/include/lgc/RowAccumulator.td new file mode 100644 index 0000000000..ecd22a19ea --- /dev/null +++ b/sharedme/xdl/include/lgc/RowAccumulator.td @@ -0,0 +1,171 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +class DivergentLgcCoopRowAccOp traits_ = []> + : DivergentLgcXdlOp; + +class LgcCoopRowAccOp traits_ = []> + : LgcXdlOp; + +def CooperativeRowAccLoadOp : DivergentLgcCoopRowAccOp<"load", [Memory<[(read)]>, WillReturn]> { + let arguments = (ins value:$pointer, I32:$stride, CooperativeMatrixElementType:$elem_type, CooperativeMatrixMemoryAccess:$memory_access); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "load cooperative rowacc from memory"; + let description = [{ + Load contiguous elements from the specified location of the memory. + + Return acc row data in finalized mode. + + 'pointer' is the pointer address to the data. + 'stride' is the stride in bytes in memory between the first elements in the source data. + 'elem_type' is the element type for the row acc. + + 'memory_access' is a set of flags describing the memory. + - Bit 0 is set if the memory is volatile + - Bit 1 is set if the memory is coherent + - Bit 2 is set if the memory is temporal. + }]; +} + +def CooperativeRowAccStoreOp : DivergentLgcCoopRowAccOp<"store", [Memory<[(write)]>]> { + let arguments = (ins value:$pointer, I32:$stride, CooperativeMatrixElementType:$elem_type, value:$data, CooperativeMatrixMemoryAccess:$memory_access); + let results = (outs); + + let summary = "store cooperative rowacc to memory"; + let description = [{ + Store a contiguous elements from the specified location of the memory. + + 'pointer' is the pointer address to the data. + 'stride' is the stride in bytes in memory between the first elements in the source data. + 'elem_type' is the element type for the row acc. + 'data' is data of row acc, Must be in finalized mode. + + 'memory_access' is a set of flags describing the memory. + - Bit 0 is set if the memory is volatile + - Bit 1 is set if the memory is coherent + - Bit 2 is set if the memory is temporal. + }]; +} + +def CooperativeRowAccAccumulateModeOp : DivergentLgcCoopRowAccOp<"accumulate.mode", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$row_acc, CooperativeMatrixElementType:$elem_type); + let results = (outs (eq $row_acc):$result); + + let summary = "change cooperative row acc date mode from finalize mode to accumulate mode"; + let description = [{ + convert the row acc data from finalize mode to accumulate mode. + + Return acc row data in accumulate mode. + + 'row_acc' is the input row acc data, must be in finalize mode. + 'elem_type' is the element type for the row acc. + }]; +} + +def CooperativeRowAccFinalizeModeOp : DivergentLgcCoopRowAccOp<"finalize.mode", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$row_acc, CooperativeMatrixElementType:$elem_type); + let results = (outs (eq $row_acc):$result); + + let summary = "change cooperative rowacc date mode from accumulate state to finalize state"; + let description = [{ + convert the row accumulator data from accumulate mode to finalize mode. + + Return row accumulator data in finalized mode. + + 'row_acc' is the input row acc data, must be in accumulate mode. + 'elem_type' is the element type for the row acc. + }]; +} + +def CooperativeRowAccSplatOp : DivergentLgcCoopRowAccOp<"splat", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$scalar, CooperativeMatrixElementType:$elem_type); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "fill cooperative rowacc will a scalar value"; + let description = [{ + Return filled cooperative acc row in finalize mode. + + 'scalar' is the scalar value for fill the cooperative row acc. + 'elem_type' is the element type for the cooperative row acc. + }]; +} + +def CooperativeRowAccSumAccumulateOp : DivergentLgcCoopRowAccOp<"sum.accumulate", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$matrix, CooperativeMatrixElementType:$matrix_elem_type, CooperativeMatrixLayout:$matrix_layout, value:$row_acc, CooperativeMatrixElementType:$row_acc_elem_type, AttrI1:$is_signed); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "sum and accumuate columns of cooperative matrix value to cooperative row accumulator"; + let description = [{ + Return accumulated acc row data in accumulate mode. + + 'matrix' is the input cooperative matrix. Must be A/B matrix. + 'matrix_elem_type' is the element type for the cooperative matrix. + 'matrix_layout' is the layout for the cooperative matrix. + 'row_acc' is the input cooperative row acc, must be in accumulate mode. + 'row_acc_elem_type' is the element type for input cooperative row acc. + 'is_signed' indicate if row accumulator element type is considered signed or not. + }]; +} + +def CooperativeRowAccScalarOp : DivergentLgcCoopRowAccOp<"scalar", [Memory<[]>, WillReturn]> { + let arguments = (ins CooperativeMatrixArithOp:$binop, value:$row_acc, CooperativeMatrixElementType:$elem_type, value:$scalar, AttrI1:$accumulate_mode); + let results = (outs (eq $row_acc):$result); + + let summary = "cooperative row accumulator scalar operation in accumulate or finalize mode"; + let description = [{ + Return the cooperative row accumulator data with same mode (accumulate or finalized) as input cooperative accumulator data. + + 'binop' is the cooperative matrix arithmetic operation. + 'row_acc' is the input cooperative row accumulator. + 'elem_type' is the element type for the cooperative row accumulator data. + 'scalar' is the scalar value for operation. + 'accumulate_mode' indicate if input and return accumulator data in accumulate or finalize mode. + }]; +} + +def CooperativeRowAccExpandOp : DivergentLgcCoopRowAccOp<"expand", [Memory<[]>, WillReturn]> { + let arguments = (ins value:$row_acc, CooperativeMatrixElementType:$row_acc_elem_type, CooperativeMatrixElementType:$matrix_elem_type, CooperativeMatrixLayout:$matrix_layout, AttrI1:$col_major); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "expand cooperative row accumulator data to cooperative matrix, the input row accumulator data must be in finalize mode."; + let description = [{ + Return the cooperative matrix. + + 'row_acc' is the input cooperative row accumulator. + 'row_acc_elem_type' is the element type for the input cooperative row accumulator data. + 'matrix_elem_type' is the element type for the output cooperative matrix. + 'matrix_layout' is the element type for the output cooperative matrix. + 'col_major' indicate how to expand the cooperative row acculumlator data by row or col. + }]; +} diff --git a/sharedme/xdl/include/lgc/Sparse.td b/sharedme/xdl/include/lgc/Sparse.td new file mode 100644 index 0000000000..02c474b2a0 --- /dev/null +++ b/sharedme/xdl/include/lgc/Sparse.td @@ -0,0 +1,78 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +def SparsityIndexLoadOp : DivergentLgcXdlOp<"sparsityindex.load", [Memory<[(read)]>, Convergent, WillReturn]> { + let arguments = (ins value:$pointer, value:$stride, AttrI1:$col_major, AttrI32:$memory_access); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "load the sparsity index for the sparse cooperative matrix"; + let description = [{ + Load the sparsity index for sparse cooperative matrix A which will be used in sparseA * DenseB + DenseC + Return value which size is [unused_16bit | index_16bit] for wave32 or [unused_24bit | index_8bit] for wave64. + + 'pointer' is the pointer to the index data stored in memory. + 'stride' is to qualify how the index data is laid out in memory. It must be of scalar integer type. + 'col_major' is a constant instruction with 32-bit integer type whose value corresponds to a Sparsity Index Memory Layout. + + 'memory_access' is a set of flags describing the memory. + - Bit 0 is set if the memory is volatile + - Bit 1 is set if the memory is coherent + - Bit 2 is set if the memory is temporal. + }]; +} + +def SparseCooperativeMatrixMulAddOp : DivergentLgcXdlOp<"sparseCooperativeMatrix.muladd", [Convergent, WillReturn]> { + let arguments = (ins value:$matrix_a, value:$sparse_index, value:$matrix_b, value:$matrix_c, AttrI1:$is_signed_a, AttrI1:$is_signed_b, + AttrI1:$is_sat, CooperativeMatrixElementType:$matrix_a_elem_type, + CooperativeMatrixElementType:$matrix_b_elem_type, CooperativeMatrixElementType:$matrix_c_elem_type, + CooperativeMatrixElementType:$matrix_d_elem_type, AttrI32:$k_multiplier); + let results = (outs value:$result); + + let defaultBuilderHasExplicitResultType = true; + + let summary = "The muladd operation supported as sparseA * denseB + denseC"; + let description = [{ + Sparse linear-algebraic matrix multiply of A and B with structural sparsity + information taken from Index, followed by component-wise addition of C. + The semantics of the multiplication are defined by the sparsity format of Index. + + Only support sparseA*DenseB+DenseC now. + + 'matrix_a' is the factor cooperative matrix whose use is MatrixAKHR. + 'matrix_b' is the factor cooperative matrix whose use is MatrixBKHR. + 'sparse_index' is the sparsity index. + 'matrix_c' is the accumulator cooperative matrix whose use is MatrixCKHR. + 'is_signed_a' is the signess for matrixA's element type. + 'is_signed_b' is the signess for matrixB's element type. + 'is_sat' is the saturatingAccumulation for calculation, + '$matrix_a_elem_type' is the component type of the A matrix. + '$matrix_b_elem_type' is the component type of the B matrix. + '$matrix_c_elem_type' is the component type of the C matrix. + '$matrix_d_elem_type' is the component type of the D matrix. + '$k_multiplier' is the multiplier for the matrix K size. + }]; +} diff --git a/sharedme/xdl/include/xdl/util/ElementType.h b/sharedme/xdl/include/xdl/util/ElementType.h new file mode 100644 index 0000000000..61e393d099 --- /dev/null +++ b/sharedme/xdl/include/xdl/util/ElementType.h @@ -0,0 +1,66 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file ElementType.h + * @brief contains the declaration of utility functions of Xdl Element types + *********************************************************************************************************************** + */ +#pragma once + +#include "lgc/LgcXdlTypes.h" + +namespace llvm { +class Type; +class Value; +} // namespace llvm + +namespace llvm_dialects { +class Builder; +} // namespace llvm_dialects + +namespace lgc::xdl { +// Get the LGC type of a cooperative matrix with the given element type, layout and K size. +llvm::Type *getCooperativeMatrixTy(llvm_dialects::Builder &builder, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout, unsigned kSize = 16); + +// Whether the type of a cooperative matrix is integer. +bool isUnderlyingIntegerCooperativeMatrix(CooperativeMatrixElementType elemType); + +// Interpret the cooperative matrix's element as uint32. +llvm::Value *interpretCoopMatElementAsIntegerTy(llvm_dialects::Builder &builder, llvm::Value *value, + CooperativeMatrixElementType elemType); + +// Interpret the value as cooperative matrix's element type. +llvm::Value *interpretValueAsCoopMatElementTy(llvm_dialects::Builder &builder, llvm::Value *value, + CooperativeMatrixElementType elemType); + +// Whether the type of a cooperative matrix is specified bit width. +bool isTypeNCooperativeMatrix(CooperativeMatrixElementType elemType, unsigned bitWidth); + +// Convert the element type enum into the corresponding LLVM type. +llvm::Type *transCooperativeMatrixElementType(llvm_dialects::Builder &builder, CooperativeMatrixElementType elemType); + +} // namespace lgc::xdl diff --git a/sharedme/xdl/lib/LgcXdlDialect.cpp b/sharedme/xdl/lib/LgcXdlDialect.cpp new file mode 100644 index 0000000000..3c040b007c --- /dev/null +++ b/sharedme/xdl/lib/LgcXdlDialect.cpp @@ -0,0 +1,36 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file LgcXdlDialect.cpp + * @brief Implementation for the LGC XDL IR dialect + *********************************************************************************************************************** + */ + +#include "lgc/LgcXdlDialect.h" + +#define GET_INCLUDES +#define GET_DIALECT_DEFS +#include "lib/LgcXdlDialect.cpp.inc" diff --git a/sharedme/xdl/test/CMakeLists.txt b/sharedme/xdl/test/CMakeLists.txt new file mode 100644 index 0000000000..fa5d55ccaf --- /dev/null +++ b/sharedme/xdl/test/CMakeLists.txt @@ -0,0 +1,46 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +set(SHAREDME_XDL_TEST_DEPS FileCheck count not opt) + +# required by lit.site.cfg.py.in +set(SHAREDME_XDL_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + +# required by configure_lit_site_cfg +set(LLVM_LIT_OUTPUT_DIR ${LLVM_TOOLS_BINARY_DIR}) +get_target_property(LIT_DEFINITIONS vkgc_headers INTERFACE_COMPILE_DEFINITIONS) +configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py +) + +add_lit_testsuite(check-sharedme-xdl "Running the shared component XDL lit testcases" + ${CMAKE_CURRENT_BINARY_DIR} + ${exclude_from_check_all} + DEPENDS ${SHAREDME_XDL_TEST_DEPS} +) +set_target_properties(check-sharedme-xdl PROPERTIES FOLDER "Tests") diff --git a/sharedme/xdl/test/lit.cfg.py b/sharedme/xdl/test/lit.cfg.py new file mode 100644 index 0000000000..c2f3f167c4 --- /dev/null +++ b/sharedme/xdl/test/lit.cfg.py @@ -0,0 +1,73 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +# Configuration file for the 'lit' test runner. + +import os +import sys +import re +import platform +import subprocess + +import lit.util +import lit.formats +from lit.llvm import llvm_config +from lit.llvm.subst import FindTool +from lit.llvm.subst import ToolSubst + +# name: The name of this test suite. +config.name = 'SHAREDME_XDL_TEST' + +# testFormat: The test format to use to interpret tests. +config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) + +# suffixes: A list of file extensions to treat as test files. This is overridden +# by individual lit.local.cfg files in the test subdirectories. +config.suffixes = ['.ll'] + +# excludes: A list of directories to exclude from the testsuite. The 'Inputs' +# subdirectories contain auxiliary inputs for various tests in their parent +# directories. +config.excludes = ['CMakeLists.txt'] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The root path where tests should be run. +config.test_exec_root = os.path.join(config.llvm_obj_root, 'test') + +if config.llvm_assertions in ['ON', '1']: + config.available_features.add('assertions') + +llvm_config.use_default_substitutions() + +config.substitutions.append(('%PATH%', config.environment['PATH'])) +config.substitutions.append(('%gfxip', config.gfxip)) + +tool_dirs = [config.llvm_tools_dir] + +tools = ['FileCheck', 'count', 'not', 'opt'] + +llvm_config.add_tool_substitutions(tools, tool_dirs) diff --git a/sharedme/xdl/test/lit.site.cfg.py.in b/sharedme/xdl/test/lit.site.cfg.py.in new file mode 100644 index 0000000000..04ae309188 --- /dev/null +++ b/sharedme/xdl/test/lit.site.cfg.py.in @@ -0,0 +1,61 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + +@LIT_SITE_CFG_IN_HEADER@ + +import sys + +config.llvm_src_root = "@LLVM_BUILD_MAIN_SRC_DIR@" +config.llvm_obj_root = "@LLVM_BINARY_DIR@" +config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" +config.spvgen_dir = "@XGL_SPVGEN_BUILD_PATH@" +config.lit_tools_dir = "" +config.python_executable = "@PYTHON_EXECUTABLE@" +config.test_run_dir = "@CMAKE_CURRENT_BINARY_DIR@" +config.gfxip = "-gfxip=11.0" + +# Propagate CMake options used in lit feature tests. +config.llvm_assertions = "@LLVM_ENABLE_ASSERTIONS@" + +for d in "@LIT_DEFINITIONS@".split(";"): + def_split = d.split("=") + name = def_split[0].lower() + val = def_split[1] if len(def_split) > 1 else "ON" + config.available_features.add(name) + +# Support substitution of the tools_dir with user parameters. This is +# used when we can't determine the tool dir at configuration time. +try: + config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params +except KeyError: + e = sys.exc_info()[1] + key, = e.args + lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) + +import lit.llvm +lit.llvm.initialize(lit_config, config) + +# Let the main config do the real work. +lit_config.load_config(config, "@SHAREDME_XDL_TEST_SOURCE_DIR@/lit.cfg.py") diff --git a/sharedme/xdl/util/ElementType.cpp b/sharedme/xdl/util/ElementType.cpp new file mode 100644 index 0000000000..73a6baa27e --- /dev/null +++ b/sharedme/xdl/util/ElementType.cpp @@ -0,0 +1,239 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file ElementType.cpp + * @brief contains the definition of utility functions of Xdl element types + *********************************************************************************************************************** + */ + +#include "xdl/util/ElementType.h" +#include "lgc/LgcXdlTypes.h" +#include "llvm-dialects/Dialect/Builder.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace lgc::xdl; + +// ===================================================================================================================== +// Get the bit width of the cooperativeMatrix element type +// +// @param elemType : The matrix element type +static unsigned getBitWidthOfCooperativeMatrixElement(CooperativeMatrixElementType elemType) { + switch (elemType) { + case CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::Float16Packed: + case CooperativeMatrixElementType::BFloat16: + case CooperativeMatrixElementType::Int16: + return 16; + case CooperativeMatrixElementType::Float32: + case CooperativeMatrixElementType::Int32: + return 32; + case CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Float8: + case CooperativeMatrixElementType::BFloat8: + return 8; + case CooperativeMatrixElementType::Int4: + return 4; + default: + llvm_unreachable("Type is not supported!"); + } +} + +// ===================================================================================================================== +// Get the LGC type of a cooperative matrix with the given element type and layout. +// +// @param builder : The IR builder +// @param elemType : the matrix element type +// @param layout : the matrix layout +// @param kSize : the matrix K size +llvm::Type *lgc::xdl::getCooperativeMatrixTy(llvm_dialects::Builder &builder, CooperativeMatrixElementType elemType, + CooperativeMatrixLayout layout, unsigned kSize) { + // Note: the layout currently has no influence on the type. In the long run, we should switch to genuinely opaque + // types at the LGC level, and parameterize the type using both the element type and the layout. + + llvm::Type *wordTy = isUnderlyingIntegerCooperativeMatrix(elemType) ? builder.getInt32Ty() : builder.getFloatTy(); + [[maybe_unused]] unsigned cntDwords = 0; + switch (layout) { + case CooperativeMatrixLayout::Gfx10Accumulator16bitMatrixLayout: + case CooperativeMatrixLayout::Gfx10AccumulatorMatrixLayout: + case CooperativeMatrixLayout::AccumulatorMatrixLayout: + return llvm::FixedVectorType::get(wordTy, 8); + case CooperativeMatrixLayout::FactorMatrixLayout: + if (elemType == CooperativeMatrixElementType::Int4) + return llvm::FixedVectorType::get(wordTy, 2); + if (elemType == CooperativeMatrixElementType::Int8) + return llvm::FixedVectorType::get(wordTy, 4); + return llvm::FixedVectorType::get(wordTy, 8); + default: + llvm_unreachable("Type is not supported!"); + } +} + +// ===================================================================================================================== +// Whether the underlying type of a cooperative matrix is integer. +// +// @param elemType : The matrix element type +bool lgc::xdl::isUnderlyingIntegerCooperativeMatrix(CooperativeMatrixElementType elemType) { + switch (elemType) { + case CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::Float32: + case CooperativeMatrixElementType::Float16Packed: + return false; + case CooperativeMatrixElementType::BFloat16: + case CooperativeMatrixElementType::Float8: + case CooperativeMatrixElementType::BFloat8: + case CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Int16: + case CooperativeMatrixElementType::Int32: + case CooperativeMatrixElementType::Int4: + return true; + default: + llvm_unreachable("Type is not supported!"); + } +} + +// ===================================================================================================================== +// Interpret the cooperative matrix's element as uint32. +// +// @param builder : The IR builder +// @param value : Input data as element type +// @param elemType : The source element type enum value +// @returns The data of uint32 +llvm::Value *lgc::xdl::interpretCoopMatElementAsIntegerTy(llvm_dialects::Builder &builder, llvm::Value *value, + CooperativeMatrixElementType elemType) { + [[maybe_unused]] llvm::Type *llElemType = transCooperativeMatrixElementType(builder, elemType); + llvm::Type *targetType = builder.getInt32Ty(); + switch (elemType) { + case CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::Float16Packed: + return builder.CreateZExt(builder.CreateBitCast(value, builder.getInt16Ty()), targetType); + case CooperativeMatrixElementType::BFloat16: + assert(llElemType->isIntegerTy(16)); + return builder.CreateZExt(value, targetType); + case CooperativeMatrixElementType::Float32: + return builder.CreateBitCast(value, targetType); + case CooperativeMatrixElementType::BFloat8: + case CooperativeMatrixElementType::Float8: + assert(llElemType->isIntegerTy(8)); + return builder.CreateZExt(value, targetType); + case CooperativeMatrixElementType::Int16: + case CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Int4: + return builder.CreateZExt(value, targetType); + case CooperativeMatrixElementType::Int32: + return value; + default: + llvm_unreachable("Unknown cooperative matrix element type."); + } + return nullptr; +} + +// ===================================================================================================================== +// Interpret the value as cooperative matrix's element type. +// +// @param builder : The IR builder +// @param value : Input data as uint32 +// @param elemType : The target element type enum value +// @returns The data of correct element type +llvm::Value *lgc::xdl::interpretValueAsCoopMatElementTy(llvm_dialects::Builder &builder, llvm::Value *value, + CooperativeMatrixElementType elemType) { + assert(value->getType()->isIntegerTy(32)); + llvm::Type *type = transCooperativeMatrixElementType(builder, elemType); + switch (elemType) { + case CooperativeMatrixElementType::BFloat8: + case CooperativeMatrixElementType::Float8: + assert(type->isIntegerTy(8)); + return builder.CreateTrunc(value, type); + case CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::Float16Packed: + return builder.CreateBitCast(builder.CreateTrunc(value, builder.getInt16Ty()), type); + case CooperativeMatrixElementType::BFloat16: + assert(type->isIntegerTy(16)); + return builder.CreateTrunc(value, type); + case CooperativeMatrixElementType::Float32: + return builder.CreateBitCast(value, builder.getFloatTy()); + case CooperativeMatrixElementType::Int16: + case CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Int4: + return builder.CreateTrunc(value, type); + case CooperativeMatrixElementType::Int32: + return value; + default: + llvm_unreachable("Unknown cooperative matrix element type."); + } + return nullptr; +} + +// ===================================================================================================================== +// Whether the type of a cooperative matrix is specified bit width. +// +// @param elemType : the matrix element type +// @param bitWidth : the specified bit width +bool lgc::xdl::isTypeNCooperativeMatrix(CooperativeMatrixElementType elemType, unsigned bitWidth) { + unsigned width = getBitWidthOfCooperativeMatrixElement(elemType); + return width == bitWidth; +} + +// ===================================================================================================================== +// Convert the element type enum into the corresponding LLVM type. +// +// @param builder : The IR builder +// @param elemType : The element type enum value +// @returns the corresponding LLVM type +llvm::Type *lgc::xdl::transCooperativeMatrixElementType(llvm_dialects::Builder &builder, + CooperativeMatrixElementType elemType) { + llvm::Type *type = nullptr; + switch (elemType) { + case CooperativeMatrixElementType::Float16: + case CooperativeMatrixElementType::Float16Packed: + type = builder.getHalfTy(); + break; + case CooperativeMatrixElementType::Float32: + type = builder.getFloatTy(); + break; + case CooperativeMatrixElementType::Int16: + case CooperativeMatrixElementType::BFloat16: + type = builder.getInt16Ty(); + break; + case CooperativeMatrixElementType::Int32: + type = builder.getInt32Ty(); + break; + case CooperativeMatrixElementType::Int8: + case CooperativeMatrixElementType::Float8: + case CooperativeMatrixElementType::BFloat8: + type = builder.getInt8Ty(); + break; + case CooperativeMatrixElementType::Int4: + type = builder.getIntNTy(4); + break; + default: + llvm_unreachable("The element type is not supported."); + } + assert(type->isIntegerTy() == isUnderlyingIntegerCooperativeMatrix(elemType)); + return type; +} diff --git a/test/amber/subgroupshuffle-index-constant.amber b/test/amber/subgroupshuffle-index-constant.amber new file mode 100644 index 0000000000..bf48036731 --- /dev/null +++ b/test/amber/subgroupshuffle-index-constant.amber @@ -0,0 +1,72 @@ +#!amber -v 1.3 +## Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ## + + +# RUN: run_amber_test.py --icd %icd %s +# +# Test vec2 shuffles. + +DEVICE_FEATURE SubgroupSupportedOperations.shuffle + +SHADER compute compute_shader GLSL TARGET_ENV spv1.3 +#version 450 +#extension GL_KHR_shader_subgroup_shuffle : enable + +layout(local_size_x = 64) in; + +layout(set = 0, binding = 0, std430) buffer InBuffer { + ivec2 a[64]; +} inbuf; + +layout(set = 0, binding = 1, std430) buffer OutBuffer { + ivec2 b[64]; +} outbuf; + +void main() { + uint id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; + ivec2 x = inbuf.a[id]; + x = subgroupShuffle(x, 5); + outbuf.b[id] = x; +} +END + +# Buffer pattern guarantees same result for both wave32 and wave64 +BUFFER inbuf DATA_TYPE int32 STD430 DATA + 0 1 2 3 4 5 6 7 8 9 + 10 11 12 13 14 15 16 17 18 19 + 20 21 22 23 24 25 26 27 28 29 + 30 31 32 33 34 35 36 37 38 39 + 40 41 42 43 44 45 46 47 48 49 + 50 51 52 53 54 55 56 57 58 59 + 60 61 62 63 + 0 1 2 3 4 5 6 7 8 9 + 10 11 12 13 14 15 16 17 18 19 + 20 21 22 23 24 25 26 27 28 29 + 30 31 32 33 34 35 36 37 38 39 + 40 41 42 43 44 45 46 47 48 49 + 50 51 52 53 54 55 56 57 58 59 + 60 61 62 63 +END + +BUFFER outbuf DATA_TYPE int32 STD430 SIZE 128 FILL 9999 + +BUFFER expected DATA_TYPE int32 STD430 DATA + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 +END + +PIPELINE compute pipeline + ATTACH compute_shader + BIND BUFFER inbuf AS storage DESCRIPTOR_SET 0 BINDING 0 + BIND BUFFER outbuf AS storage DESCRIPTOR_SET 0 BINDING 1 +END + +RUN pipeline 1 1 1 + +EXPECT outbuf EQ_BUFFER expected diff --git a/test/amber/subgroupshuffle-index-uniform.amber b/test/amber/subgroupshuffle-index-uniform.amber new file mode 100644 index 0000000000..bd77c736c9 --- /dev/null +++ b/test/amber/subgroupshuffle-index-uniform.amber @@ -0,0 +1,81 @@ +#!amber -v 1.3 +## Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. ## + + +# RUN: run_amber_test.py --icd %icd %s +# +# Test vec2 shuffles. + +DEVICE_FEATURE SubgroupSupportedOperations.shuffle + +SHADER compute compute_shader GLSL TARGET_ENV spv1.3 +#version 450 +#extension GL_KHR_shader_subgroup_shuffle : enable + +layout(local_size_x = 64) in; + +layout(push_constant) uniform constants { + uint lane; +}; + +layout(set = 0, binding = 0, std430) buffer InBuffer { + ivec2 a[64]; +} inbuf; + +layout(set = 0, binding = 1, std430) buffer OutBuffer { + ivec2 b[64]; +} outbuf; + +void main() { + uint id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; + ivec2 x = inbuf.a[id]; + x = subgroupShuffle(x, lane); + outbuf.b[id] = x; +} +END + +# Buffer pattern guarantees same result for both wave32 and wave64 +BUFFER inbuf DATA_TYPE int32 STD430 DATA + 0 1 2 3 4 5 6 7 8 9 + 10 11 12 13 14 15 16 17 18 19 + 20 21 22 23 24 25 26 27 28 29 + 30 31 32 33 34 35 36 37 38 39 + 40 41 42 43 44 45 46 47 48 49 + 50 51 52 53 54 55 56 57 58 59 + 60 61 62 63 + 0 1 2 3 4 5 6 7 8 9 + 10 11 12 13 14 15 16 17 18 19 + 20 21 22 23 24 25 26 27 28 29 + 30 31 32 33 34 35 36 37 38 39 + 40 41 42 43 44 45 46 47 48 49 + 50 51 52 53 54 55 56 57 58 59 + 60 61 62 63 +END + +BUFFER outbuf DATA_TYPE int32 STD430 SIZE 128 FILL 9999 + +BUFFER constants DATA_TYPE int32 STD430 DATA + 5 +END + +BUFFER expected DATA_TYPE int32 STD430 DATA + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 + 10 11 10 11 10 11 10 11 10 11 10 11 10 11 10 11 +END + +PIPELINE compute pipeline + ATTACH compute_shader + BIND BUFFER inbuf AS storage DESCRIPTOR_SET 0 BINDING 0 + BIND BUFFER outbuf AS storage DESCRIPTOR_SET 0 BINDING 1 + BIND BUFFER constants AS push_constant +END + +RUN pipeline 1 1 1 + +EXPECT outbuf EQ_BUFFER expected diff --git a/test/lit.cfg.py b/test/lit.cfg.py index 08475ffb08..e756269e58 100644 --- a/test/lit.cfg.py +++ b/test/lit.cfg.py @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + # -*- Python -*- # Configuration file for the 'lit' test runner. diff --git a/test/lit.site.cfg.py.in b/test/lit.site.cfg.py.in index ad638abca7..7174c2eede 100644 --- a/test/lit.site.cfg.py.in +++ b/test/lit.site.cfg.py.in @@ -1,3 +1,28 @@ +## + ####################################################################################################################### + # + # Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + # + # Permission is hereby granted, free of charge, to any person obtaining a copy + # of this software and associated documentation files (the "Software"), to + # deal in the Software without restriction, including without limitation the + # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + # sell copies of the Software, and to permit persons to whom the Software is + # furnished to do so, subject to the following conditions: + # + # The above copyright notice and this permission notice shall be included in all + # copies or substantial portions of the Software. + # + # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + # IN THE SOFTWARE. + # + ####################################################################################################################### + @LIT_SITE_CFG_IN_HEADER@ import sys diff --git a/tool/UpdateTestChecks/common.py b/tool/UpdateTestChecks/common.py index 4c8aa2b1f4..f6389fd361 100644 --- a/tool/UpdateTestChecks/common.py +++ b/tool/UpdateTestChecks/common.py @@ -9,1285 +9,2612 @@ from __future__ import print_function import argparse +import bisect +import collections import copy import glob -import itertools import os import re import subprocess import sys import shlex -from typing import List +from typing import List, Mapping, Set ##### Common utilities for update_*test_checks.py _verbose = False -_prefix_filecheck_ir_name = '' +_prefix_filecheck_ir_name = "" + +""" +Version changelog: + +1: Initial version, used by tests that don't specify --version explicitly. +2: --function-signature is now enabled by default and also checks return + type/attributes. +3: Opening parenthesis of function args is kept on the first LABEL line + in case arguments are split to a separate SAME line. +4: --check-globals now has a third option ('smart'). The others are now called + 'none' and 'all'. 'smart' is the default. +5: Basic block labels are matched by FileCheck expressions +""" +DEFAULT_VERSION = 5 + +SUPPORTED_ANALYSES = { + "Branch Probability Analysis", + "Cost Model Analysis", + "Loop Access Analysis", + "Scalar Evolution Analysis", +} class Regex(object): - """Wrap a compiled regular expression object to allow deep copy of a regexp. - This is required for the deep copy done in do_scrub. + """Wrap a compiled regular expression object to allow deep copy of a regexp. + This is required for the deep copy done in do_scrub. - """ - def __init__(self, regex): - self.regex = regex + """ - def __deepcopy__(self, memo): - result = copy.copy(self) - result.regex = self.regex - return result + def __init__(self, regex): + self.regex = regex - def search(self, line): - return self.regex.search(line) + def __deepcopy__(self, memo): + result = copy.copy(self) + result.regex = self.regex + return result - def sub(self, repl, line): - return self.regex.sub(repl, line) + def search(self, line): + return self.regex.search(line) - def pattern(self): - return self.regex.pattern + def sub(self, repl, line): + return self.regex.sub(repl, line) - def flags(self): - return self.regex.flags + def pattern(self): + return self.regex.pattern + + def flags(self): + return self.regex.flags class Filter(Regex): - """Augment a Regex object with a flag indicating whether a match should be + """Augment a Regex object with a flag indicating whether a match should be added (!is_filter_out) or removed (is_filter_out) from the generated checks. - """ - def __init__(self, regex, is_filter_out): - super(Filter, self).__init__(regex) - self.is_filter_out = is_filter_out + """ - def __deepcopy__(self, memo): - result = copy.deepcopy(super(Filter, self), memo) - result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) - return result + def __init__(self, regex, is_filter_out): + super(Filter, self).__init__(regex) + self.is_filter_out = is_filter_out + + def __deepcopy__(self, memo): + result = copy.deepcopy(super(Filter, self), memo) + result.is_filter_out = copy.deepcopy(self.is_filter_out, memo) + return result def parse_commandline_args(parser): - class RegexAction(argparse.Action): - """Add a regular expression option value to a list of regular expressions. - This compiles the expression, wraps it in a Regex and adds it to the option - value list.""" - def __init__(self, option_strings, dest, nargs=None, **kwargs): - if nargs is not None: - raise ValueError('nargs not allowed') - super(RegexAction, self).__init__(option_strings, dest, **kwargs) - - def do_call(self, namespace, values, flags): - value_list = getattr(namespace, self.dest) - if value_list is None: - value_list = [] - - try: - value_list.append(Regex(re.compile(values, flags))) - except re.error as error: - raise ValueError('{}: Invalid regular expression \'{}\' ({})'.format( - option_string, error.pattern, error.msg)) - - setattr(namespace, self.dest, value_list) - - def __call__(self, parser, namespace, values, option_string=None): - self.do_call(namespace, values, 0) - - class FilterAction(RegexAction): - """Add a filter to a list of filter option values.""" - def __init__(self, option_strings, dest, nargs=None, **kwargs): - super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) - - def __call__(self, parser, namespace, values, option_string=None): - super(FilterAction, self).__call__(parser, namespace, values, option_string) - - value_list = getattr(namespace, self.dest) - - is_filter_out = ( option_string == '--filter-out' ) - - value_list[-1] = Filter(value_list[-1].regex, is_filter_out) - - setattr(namespace, self.dest, value_list) - - filter_group = parser.add_argument_group( - 'filtering', - """Filters are applied to each output line according to the order given. The - first matching filter terminates filter processing for that current line.""") - - filter_group.add_argument('--filter', action=FilterAction, dest='filters', - metavar='REGEX', - help='Only include lines matching REGEX (may be specified multiple times)') - filter_group.add_argument('--filter-out', action=FilterAction, dest='filters', - metavar='REGEX', - help='Exclude lines matching REGEX') - - parser.add_argument('--include-generated-funcs', action='store_true', - help='Output checks for functions not in source') - parser.add_argument('-v', '--verbose', action='store_true', - help='Show verbose output') - parser.add_argument('-u', '--update-only', action='store_true', - help='Only update test if it was already autogened') - parser.add_argument('--force-update', action='store_true', - help='Update test even if it was autogened by a different script') - parser.add_argument('--enable', action='store_true', dest='enabled', default=True, - help='Activate CHECK line generation from this point forward') - parser.add_argument('--disable', action='store_false', dest='enabled', - help='Deactivate CHECK line generation from this point forward') - parser.add_argument('--replace-value-regex', nargs='+', default=[], - help='List of regular expressions to replace matching value names') - parser.add_argument('--prefix-filecheck-ir-name', default='', - help='Add a prefix to FileCheck IR value names to avoid conflicts with scripted names') - parser.add_argument('--global-value-regex', nargs='+', default=[], - help='List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)') - parser.add_argument('--global-hex-value-regex', nargs='+', default=[], - help='List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives') - # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, - # we need to rename the flag to just -generate-body-for-unused-prefixes. - parser.add_argument('--no-generate-body-for-unused-prefixes', - action='store_false', - dest='gen_unused_prefix_body', - default=True, - help='Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.') - args = parser.parse_args() - global _verbose, _global_value_regex, _global_hex_value_regex - _verbose = args.verbose - _global_value_regex = args.global_value_regex - _global_hex_value_regex = args.global_hex_value_regex - return args + class RegexAction(argparse.Action): + """Add a regular expression option value to a list of regular expressions. + This compiles the expression, wraps it in a Regex and adds it to the option + value list.""" + + def __init__(self, option_strings, dest, nargs=None, **kwargs): + if nargs is not None: + raise ValueError("nargs not allowed") + super(RegexAction, self).__init__(option_strings, dest, **kwargs) + + def do_call(self, namespace, values, flags): + value_list = getattr(namespace, self.dest) + if value_list is None: + value_list = [] + + try: + value_list.append(Regex(re.compile(values, flags))) + except re.error as error: + raise ValueError( + "{}: Invalid regular expression '{}' ({})".format( + option_string, error.pattern, error.msg + ) + ) + + setattr(namespace, self.dest, value_list) + + def __call__(self, parser, namespace, values, option_string=None): + self.do_call(namespace, values, 0) + + class FilterAction(RegexAction): + """Add a filter to a list of filter option values.""" + + def __init__(self, option_strings, dest, nargs=None, **kwargs): + super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs) + + def __call__(self, parser, namespace, values, option_string=None): + super(FilterAction, self).__call__(parser, namespace, values, option_string) + + value_list = getattr(namespace, self.dest) + + is_filter_out = option_string == "--filter-out" + + value_list[-1] = Filter(value_list[-1].regex, is_filter_out) + + setattr(namespace, self.dest, value_list) + + filter_group = parser.add_argument_group( + "filtering", + """Filters are applied to each output line according to the order given. The + first matching filter terminates filter processing for that current line.""", + ) + + filter_group.add_argument( + "--filter", + action=FilterAction, + dest="filters", + metavar="REGEX", + help="Only include lines matching REGEX (may be specified multiple times)", + ) + filter_group.add_argument( + "--filter-out", + action=FilterAction, + dest="filters", + metavar="REGEX", + help="Exclude lines matching REGEX", + ) + + parser.add_argument( + "--include-generated-funcs", + action="store_true", + help="Output checks for functions not in source", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Show verbose output" + ) + parser.add_argument( + "-u", + "--update-only", + action="store_true", + help="Only update test if it was already autogened", + ) + parser.add_argument( + "--force-update", + action="store_true", + help="Update test even if it was autogened by a different script", + ) + parser.add_argument( + "--enable", + action="store_true", + dest="enabled", + default=True, + help="Activate CHECK line generation from this point forward", + ) + parser.add_argument( + "--disable", + action="store_false", + dest="enabled", + help="Deactivate CHECK line generation from this point forward", + ) + parser.add_argument( + "--replace-value-regex", + nargs="+", + default=[], + help="List of regular expressions to replace matching value names", + ) + parser.add_argument( + "--prefix-filecheck-ir-name", + default="", + help="Add a prefix to FileCheck IR value names to avoid conflicts with scripted names", + ) + parser.add_argument( + "--global-value-regex", + nargs="+", + default=[], + help="List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)", + ) + parser.add_argument( + "--global-hex-value-regex", + nargs="+", + default=[], + help="List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives", + ) + # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point, + # we need to rename the flag to just -generate-body-for-unused-prefixes. + parser.add_argument( + "--no-generate-body-for-unused-prefixes", + action="store_false", + dest="gen_unused_prefix_body", + default=True, + help="Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.", + ) + # This is the default when regenerating existing tests. The default when + # generating new tests is determined by DEFAULT_VERSION. + parser.add_argument( + "--version", type=int, default=1, help="The version of output format" + ) + + # Generalize Call instructions to hide type suffixes + parser.add_argument( + "--generalize-calls", + action="store_true", + default=False, + help="Generalize call instructions in IR by not checking for exact type suffixes.", + ) + args = parser.parse_args() + # TODO: This should not be handled differently from the other options + global _verbose, _global_value_regex, _global_hex_value_regex + _verbose = args.verbose + _global_value_regex = args.global_value_regex + _global_hex_value_regex = args.global_hex_value_regex + return args + +def parse_args(parser, argv): + args = parser.parse_args(argv) + if args.version >= 2: + args.function_signature = True + # TODO: This should not be handled differently from the other options + global _verbose, _global_value_regex, _global_hex_value_regex + _verbose = args.verbose + _global_value_regex = args.global_value_regex + _global_hex_value_regex = args.global_hex_value_regex + if "check_globals" in args and args.check_globals == "default": + args.check_globals = "none" if args.version < 4 else "smart" + return args class InputLineInfo(object): - def __init__(self, line, line_number, args, argv): - self.line = line - self.line_number = line_number - self.args = args - self.argv = argv + def __init__(self, line, line_number, args, argv): + self.line = line + self.line_number = line_number + self.args = args + self.argv = argv class TestInfo(object): - def __init__(self, test, parser, script_name, input_lines, args, argv, - comment_prefix, argparse_callback): - self.parser = parser - self.argparse_callback = argparse_callback - self.path = test - self.args = args - if args.prefix_filecheck_ir_name: - global _prefix_filecheck_ir_name - _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name - self.argv = argv - self.input_lines = input_lines - self.run_lines = find_run_lines(test, self.input_lines) - self.comment_prefix = comment_prefix - if self.comment_prefix is None: - if self.path.endswith('.mir'): - self.comment_prefix = '#' - else: - self.comment_prefix = ';' - self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT - self.test_autogenerated_note = self.autogenerated_note_prefix + script_name - self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) - self.test_unused_note = self.comment_prefix + self.comment_prefix + ' ' + UNUSED_NOTE - - def ro_iterlines(self): - for line_num, input_line in enumerate(self.input_lines): - args, argv = check_for_command(input_line, self.parser, - self.args, self.argv, self.argparse_callback) - yield InputLineInfo(input_line, line_num, args, argv) - - def iterlines(self, output_lines): - output_lines.append(self.test_autogenerated_note) - for line_info in self.ro_iterlines(): - input_line = line_info.line - # Discard any previous script advertising. - if input_line.startswith(self.autogenerated_note_prefix): - continue - self.args = line_info.args - self.argv = line_info.argv - if not self.args.enabled: - output_lines.append(input_line) - continue - yield line_info - - def get_checks_for_unused_prefixes(self, run_list, used_prefixes: List[str]) -> List[str]: - unused_prefixes = set( - [prefix for sublist in run_list for prefix in sublist[0]]).difference(set(used_prefixes)) - - ret = [] - if not unused_prefixes: - return ret - ret.append(self.test_unused_note) - for unused in sorted(unused_prefixes): - ret.append('{comment} {prefix}: {match_everything}'.format( - comment=self.comment_prefix, - prefix=unused, - match_everything=r"""{{.*}}""" - )) - return ret - -def itertests(test_patterns, parser, script_name, comment_prefix=None, - comment_prefix_callback=None, argparse_callback=None): - assert comment_prefix is None or comment_prefix_callback is None - for pattern in test_patterns: - # On Windows we must expand the patterns ourselves. - tests_list = glob.glob(pattern) - if not tests_list: - warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) - continue - for test in tests_list: - with open(test) as f: - input_lines = [l.rstrip() for l in f] - args = parser.parse_args() - if argparse_callback is not None: - argparse_callback(args) - argv = sys.argv[:] - first_line = input_lines[0] if input_lines else "" - if UTC_ADVERT in first_line: - if script_name not in first_line and not args.force_update: - warn("Skipping test which wasn't autogenerated by " + script_name, test) - continue - args, argv = check_for_command(first_line, parser, args, argv, argparse_callback) - elif args.update_only: - assert UTC_ADVERT not in first_line - warn("Skipping test which isn't autogenerated: " + test) - continue - final_input_lines = [] - for l in input_lines: - if UNUSED_NOTE in l: - break - final_input_lines.append(l) - if comment_prefix_callback is not None: - comment_prefix = comment_prefix_callback(test, final_input_lines) - yield TestInfo(test, parser, script_name, final_input_lines, args, argv, - comment_prefix, argparse_callback) - -def should_add_line_to_output(input_line, prefix_set, skip_global_checks = False, comment_marker = ';'): - # Skip any blank comment lines in the IR. - if not skip_global_checks and input_line.strip() == comment_marker: - return False - # Skip a special double comment line we use as a separator. - if input_line.strip() == comment_marker + SEPARATOR: - return False - # Skip any blank lines in the IR. - #if input_line.strip() == '': - # return False - # And skip any CHECK lines. We're building our own. - m = CHECK_RE.match(input_line) - if m and m.group(1) in prefix_set: - if skip_global_checks: - global_ir_value_re = re.compile(r'\[\[', flags=(re.M)) - return not global_ir_value_re.search(input_line) - return False - - return True + def __init__( + self, + test, + parser, + script_name, + input_lines, + args, + argv, + comment_prefix, + argparse_callback, + ): + self.parser = parser + self.argparse_callback = argparse_callback + self.path = test + self.args = args + if args.prefix_filecheck_ir_name: + global _prefix_filecheck_ir_name + _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name + self.argv = argv + self.input_lines = input_lines + self.run_lines = find_run_lines(test, self.input_lines) + self.comment_prefix = comment_prefix + if self.comment_prefix is None: + if self.path.endswith(".mir") or self.path.endswith(".txt"): + self.comment_prefix = "#" + elif self.path.endswith(".s"): + self.comment_prfix = "//" + else: + self.comment_prefix = ";" + self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT + self.test_autogenerated_note = self.autogenerated_note_prefix + script_name + self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) + self.test_unused_note = ( + self.comment_prefix + self.comment_prefix + " " + UNUSED_NOTE + ) + + def ro_iterlines(self): + for line_num, input_line in enumerate(self.input_lines): + args, argv = check_for_command( + input_line, self.parser, self.args, self.argv, self.argparse_callback + ) + yield InputLineInfo(input_line, line_num, args, argv) + + def iterlines(self, output_lines): + output_lines.append(self.test_autogenerated_note) + for line_info in self.ro_iterlines(): + input_line = line_info.line + # Discard any previous script advertising. + if input_line.startswith(self.autogenerated_note_prefix): + continue + self.args = line_info.args + self.argv = line_info.argv + if not self.args.enabled: + output_lines.append(input_line) + continue + yield line_info + + def get_checks_for_unused_prefixes( + self, run_list, used_prefixes: List[str] + ) -> List[str]: + run_list = [element for element in run_list if element[0] is not None] + unused_prefixes = set( + [prefix for sublist in run_list for prefix in sublist[0]] + ).difference(set(used_prefixes)) + + ret = [] + if not unused_prefixes: + return ret + ret.append(self.test_unused_note) + for unused in sorted(unused_prefixes): + ret.append( + "{comment} {prefix}: {match_everything}".format( + comment=self.comment_prefix, + prefix=unused, + match_everything=r"""{{.*}}""", + ) + ) + return ret + +def itertests( + test_patterns, + parser, + script_name, + comment_prefix=None, + comment_prefix_callback=None, + argparse_callback=None, +): + assert comment_prefix is None or comment_prefix_callback is None + for pattern in test_patterns: + # On Windows we must expand the patterns ourselves. + tests_list = glob.glob(pattern) + if not tests_list: + warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) + continue + for test in tests_list: + with open(test) as f: + input_lines = [l.rstrip() for l in f] + first_line = input_lines[0] if input_lines else "" + if UTC_AVOID in first_line: + warn("Skipping test that must not be autogenerated: " + test) + continue + is_regenerate = UTC_ADVERT in first_line + + # If we're generating a new test, set the default version to the latest. + argv = sys.argv[:] + if not is_regenerate: + argv.insert(1, "--version=" + str(DEFAULT_VERSION)) + + args = parse_args(parser, argv[1:]) + if argparse_callback is not None: + argparse_callback(args) + if is_regenerate: + if script_name not in first_line and not args.force_update: + warn( + "Skipping test which wasn't autogenerated by " + script_name, + test, + ) + continue + args, argv = check_for_command( + first_line, parser, args, argv, argparse_callback + ) + elif args.update_only: + assert UTC_ADVERT not in first_line + warn("Skipping test which isn't autogenerated: " + test) + continue + final_input_lines = [] + for l in input_lines: + if UNUSED_NOTE in l: + break + final_input_lines.append(l) + if comment_prefix_callback is not None: + comment_prefix = comment_prefix_callback(test, final_input_lines) + yield TestInfo( + test, + parser, + script_name, + final_input_lines, + args, + argv, + comment_prefix, + argparse_callback, + ) + +def should_add_line_to_output( + input_line, + prefix_set, + *, + skip_global_checks=False, + skip_same_checks=False, + comment_marker=";", +): + # Skip any blank comment lines in the IR. + if not skip_global_checks and input_line.strip() == comment_marker: + return False + # Skip a special double comment line we use as a separator. + if input_line.strip() == comment_marker + SEPARATOR: + return False + # Skip any blank lines in the IR. + # if input_line.strip() == '': + # return False + # And skip any CHECK lines. We're building our own. + m = CHECK_RE.match(input_line) + if m and m.group(1) in prefix_set: + if skip_same_checks and CHECK_SAME_RE.match(input_line): + # The previous CHECK line was removed, so don't leave this dangling + return False + if skip_global_checks: + # Skip checks only if they are of global value definitions + global_ir_value_re = re.compile(r"(\[\[|@)", flags=(re.M)) + is_global = global_ir_value_re.search(input_line) + return not is_global + return False + + return True + +def collect_original_check_lines(ti: TestInfo, prefix_set: set): + """ + Collect pre-existing check lines into a dictionary `result` which is + returned. + + result[func_name][prefix] is filled with a list of right-hand-sides of check + lines. + """ + result = collections.defaultdict(lambda: {}) + + current_prefix = None + current_function = None + for input_line_info in ti.ro_iterlines(): + input_line = input_line_info.line + if input_line.lstrip().startswith(";"): + m = CHECK_RE.match(input_line) + if m is not None: + prefix = m.group(1) + check_kind = m.group(2) + line = input_line[m.end() :].strip() + + if prefix != current_prefix: + current_function = None + current_prefix = None + + if check_kind not in ["LABEL", "SAME"]: + if current_function is not None: + current_function.append(line) + continue + + if check_kind == "SAME": + continue + + if check_kind == "LABEL": + m = IR_FUNCTION_LABEL_RE.match(line) + if m is not None: + func_name = m.group(1) + if ( + ti.args.function is not None + and func_name != ti.args.function + ): + # When filtering on a specific function, skip all others. + continue + + current_prefix = prefix + current_function = result[func_name][prefix] = [] + continue + + current_function = None + + return result # Perform lit-like substitutions def getSubstitutions(sourcepath): - sourcedir = os.path.dirname(sourcepath) - return [('%s', sourcepath), - ('%S', sourcedir), - ('%p', sourcedir), - ('%{pathsep}', os.pathsep)] + sourcedir = os.path.dirname(sourcepath) + return [ + ("%s", sourcepath), + ("%S", sourcedir), + ("%p", sourcedir), + ("%{pathsep}", os.pathsep), + ] def applySubstitutions(s, substitutions): - for a,b in substitutions: - s = s.replace(a, b) - return s + for a, b in substitutions: + s = s.replace(a, b) + return s # Invoke the tool that is being tested. def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): - with open(ir) as ir_file: - substitutions = getSubstitutions(ir) - - # TODO Remove the str form which is used by update_test_checks.py and - # update_llc_test_checks.py - # The safer list form is used by update_cc_test_checks.py - if preprocess_cmd: - # Allow pre-processing the IR file (e.g. using sed): - assert isinstance(preprocess_cmd, str) # TODO: use a list instead of using shell - preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() - if verbose: - print('Pre-processing input file: ', ir, " with command '", - preprocess_cmd, "'", sep="", file=sys.stderr) - # Python 2.7 doesn't have subprocess.DEVNULL: - with open(os.devnull, 'w') as devnull: - pp = subprocess.Popen(preprocess_cmd, shell=True, stdin=devnull, - stdout=subprocess.PIPE) - ir_file = pp.stdout - - if isinstance(cmd_args, list): - args = [applySubstitutions(a, substitutions) for a in cmd_args] - stdout = subprocess.check_output([exe] + args, stdin=ir_file) - else: - stdout = subprocess.check_output(exe + ' ' + applySubstitutions(cmd_args, substitutions), - shell=True, stdin=ir_file) - if sys.version_info[0] > 2: - # FYI, if you crashed here with a decode error, your run line probably - # results in bitcode or other binary format being written to the pipe. - # For an opt test, you probably want to add -S or -disable-output. - stdout = stdout.decode() - # Fix line endings to unix CR style. - return stdout.replace('\r\n', '\n') + with open(ir) as ir_file: + substitutions = getSubstitutions(ir) + + # TODO Remove the str form which is used by update_test_checks.py and + # update_llc_test_checks.py + # The safer list form is used by update_cc_test_checks.py + if preprocess_cmd: + # Allow pre-processing the IR file (e.g. using sed): + assert isinstance( + preprocess_cmd, str + ) # TODO: use a list instead of using shell + preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip() + if verbose: + print( + "Pre-processing input file: ", + ir, + " with command '", + preprocess_cmd, + "'", + sep="", + file=sys.stderr, + ) + pp = subprocess.Popen( + preprocess_cmd, + shell=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + ) + ir_file = pp.stdout + + if isinstance(cmd_args, list): + args = [applySubstitutions(a, substitutions) for a in cmd_args] + stdout = subprocess.check_output([exe] + args, stdin=ir_file) + else: + stdout = subprocess.check_output( + exe + " " + applySubstitutions(cmd_args, substitutions), + shell=True, + stdin=ir_file, + ) + if sys.version_info[0] > 2: + # FYI, if you crashed here with a decode error, your run line probably + # results in bitcode or other binary format being written to the pipe. + # For an opt test, you probably want to add -S or -disable-output. + stdout = stdout.decode() + # Fix line endings to unix CR style. + return stdout.replace("\r\n", "\n") # Invoke the tool that is being tested, without any preprocessing and without # support for pipes. def invoke_tool_only(exe, cmd_args, ir, verbose=False): - substitutions = getSubstitutions(ir) + substitutions = getSubstitutions(ir) - if isinstance(cmd_args, list): - args = [applySubstitutions(a, substitutions) for a in cmd_args] - stdout = subprocess.check_output([exe] + args) - else: - stdout = subprocess.check_output(exe + ' ' + applySubstitutions(cmd_args, substitutions), - shell=True) + if isinstance(cmd_args, list): + args = [applySubstitutions(a, substitutions) for a in cmd_args] + stdout = subprocess.check_output([exe] + args) + else: + stdout = subprocess.check_output( + exe + " " + applySubstitutions(cmd_args, substitutions), shell=True + ) - # FYI, if you crashed here with a decode error, your run line probably - # results in bitcode or other binary format being written to the pipe. - # For an opt test, you probably want to add -S or -disable-output. - stdout = stdout.decode() + # FYI, if you crashed here with a decode error, your run line probably + # results in bitcode or other binary format being written to the pipe. + # For an opt test, you probably want to add -S or -disable-output. + stdout = stdout.decode() - # Fix line endings to unix CR style. - return stdout.replace('\r\n', '\n') + # Fix line endings to unix CR style. + return stdout.replace("\r\n", "\n") ##### LLVM IR parser -RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$') -CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)') -PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$') -CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:') - -UTC_ARGS_KEY = 'UTC_ARGS:' -UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P.*)\s*$') -UTC_ADVERT = 'NOTE: Assertions have been autogenerated by ' -UNUSED_NOTE = 'NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:' +RUN_LINE_RE = re.compile(r"^\s*(?://|[;#])\s*RUN:\s*(.*)$") +CHECK_PREFIX_RE = re.compile(r"--?check-prefix(?:es)?[= ](\S+)") +PREFIX_RE = re.compile("^[a-zA-Z0-9_-]+$") +CHECK_RE = re.compile( + r"^\s*(?://|[;#])\s*([^:]+?)(?:-(NEXT|NOT|DAG|LABEL|SAME|EMPTY))?:" +) +CHECK_SAME_RE = re.compile(r"^\s*(?://|[;#])\s*([^:]+?)(?:-SAME)?:") + +UTC_ARGS_KEY = "UTC_ARGS:" +UTC_ARGS_CMD = re.compile(r".*" + UTC_ARGS_KEY + r"\s*(?P.*)\s*$") +UTC_ADVERT = "NOTE: Assertions have been autogenerated by " +UTC_AVOID = "NOTE: Do not autogenerate" +UNUSED_NOTE = "NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:" OPT_FUNCTION_RE = re.compile( - r'^(\s*;\s*Function\sAttrs:\s(?P[\w\s():,]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P[\w.$-]+?)\s*' - r'(?P\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P.*?)^\}$', - flags=(re.M | re.S)) + r"^(\s*;\s*Function\sAttrs:\s(?P[\w\s():,]+?))?\s*define\s+(?P[^@]*)@(?P[\w.$-]+?)\s*" + r"(?P\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P.*?)^\}$", + flags=(re.M | re.S), +) ANALYZE_FUNCTION_RE = re.compile( - r'^\s*\'(?P[\w\s-]+?)\'\s+for\s+function\s+\'(?P[\w.$-]+?)\':' - r'\s*\n(?P.*)$', - flags=(re.X | re.S)) + r"^\s*\'(?P[\w\s-]+?)\'\s+for\s+function\s+\'(?P[\w.$-]+?)\':" + r"\s*\n(?P.*)$", + flags=(re.X | re.S), +) -LV_DEBUG_RE = re.compile( - r'^\s*\'(?P[\w.$-]+?)\'[^\n]*' - r'\s*\n(?P.*)$', - flags=(re.X | re.S)) +LOOP_PASS_DEBUG_RE = re.compile( + r"^\s*\'(?P[\w.$-]+?)\'[^\n]*" r"\s*\n(?P.*)$", flags=(re.X | re.S) +) IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') +IR_FUNCTION_LABEL_RE = re.compile( + r'^\s*(?:define\s+(?:internal\s+)?[^@]*)?@"?([\w.$-]+)"?\s*\(' +) TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') -TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)') -MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)') -DEBUG_ONLY_ARG_RE = re.compile(r'-debug-only[= ]([^ ]+)') - -SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') -SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) -SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) +TRIPLE_ARG_RE = re.compile(r"-m?triple[= ]([^ ]+)") +MARCH_ARG_RE = re.compile(r"-march[= ]([^ ]+)") +DEBUG_ONLY_ARG_RE = re.compile(r"-debug-only[= ]([^ ]+)") + +SCRUB_LEADING_WHITESPACE_RE = re.compile(r"^(\s+)") +SCRUB_WHITESPACE_RE = re.compile(r"(?!^(| \w))[ \t]+", flags=re.M) +SCRUB_PRESERVE_LEADING_WHITESPACE_RE = re.compile(r"((?!^)[ \t]*(\S))[ \t]+") +SCRUB_TRAILING_WHITESPACE_RE = re.compile(r"[ \t]+$", flags=re.M) SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE -SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M) -SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') +SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile( + r"([ \t]|(#[0-9]+))+$", flags=re.M +) +SCRUB_KILL_COMMENT_RE = re.compile(r"^ *#+ +kill:.*\n") SCRUB_LOOP_COMMENT_RE = re.compile( - r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M) -SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M) + r"# =>This Inner Loop Header:.*|# in Loop:.*", flags=re.M +) +SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r"(?<=\S)+[ \t]*#$", flags=re.M) + +# Generalize calls +TYPE_RE = r"(p\d+)?(v\d+)?([ifp]\d+|half|float|double|void|ptr)" +TYPE_SUFFIX_RE = r"([._]" + TYPE_RE + ")+" +CALL_INST_RE = re.compile( + r"call " + r".*" + r" @.*" + r"(?P" + TYPE_SUFFIX_RE + r")" +) -SEPARATOR = '.' +SEPARATOR = "." def error(msg, test_file=None): - if test_file: - msg = '{}: {}'.format(msg, test_file) - print('ERROR: {}'.format(msg), file=sys.stderr) + if test_file: + msg = "{}: {}".format(msg, test_file) + print("ERROR: {}".format(msg), file=sys.stderr) def warn(msg, test_file=None): - if test_file: - msg = '{}: {}'.format(msg, test_file) - print('WARNING: {}'.format(msg), file=sys.stderr) + if test_file: + msg = "{}: {}".format(msg, test_file) + print("WARNING: {}".format(msg), file=sys.stderr) def debug(*args, **kwargs): - # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): - if 'file' not in kwargs: - kwargs['file'] = sys.stderr - if _verbose: - print(*args, **kwargs) + # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): + if "file" not in kwargs: + kwargs["file"] = sys.stderr + if _verbose: + print(*args, **kwargs) def find_run_lines(test, lines): - debug('Scanning for RUN lines in test file:', test) - raw_lines = [m.group(1) - for m in [RUN_LINE_RE.match(l) for l in lines] if m] - run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] - for l in raw_lines[1:]: - if run_lines[-1].endswith('\\'): - run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l - else: - run_lines.append(l) - debug('Found {} RUN lines in {}:'.format(len(run_lines), test)) - for l in run_lines: - debug(' RUN: {}'.format(l)) - return run_lines + debug("Scanning for RUN lines in test file:", test) + raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] + run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] + for l in raw_lines[1:]: + if run_lines[-1].endswith("\\"): + run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l + else: + run_lines.append(l) + debug("Found {} RUN lines in {}:".format(len(run_lines), test)) + for l in run_lines: + debug(" RUN: {}".format(l)) + return run_lines def get_triple_from_march(march): - triples = { - 'amdgcn': 'amdgcn', - 'r600': 'r600', - 'mips': 'mips', - 'sparc': 'sparc', - 'hexagon': 'hexagon', - 've': 've', - } - for prefix, triple in triples.items(): - if march.startswith(prefix): - return triple - print("Cannot find a triple. Assume 'x86'", file=sys.stderr) - return 'x86' + triples = { + "amdgcn": "amdgcn", + "r600": "r600", + "mips": "mips", + "nvptx64": "nvptx64", + "sparc": "sparc", + "hexagon": "hexagon", + "ve": "ve", + } + for prefix, triple in triples.items(): + if march.startswith(prefix): + return triple + print("Cannot find a triple. Assume 'x86'", file=sys.stderr) + return "x86" def apply_filters(line, filters): - has_filter = False - for f in filters: - if not f.is_filter_out: - has_filter = True - if f.search(line): - return False if f.is_filter_out else True - # If we only used filter-out, keep the line, otherwise discard it since no - # filter matched. - return False if has_filter else True + has_filter = False + for f in filters: + if not f.is_filter_out: + has_filter = True + if f.search(line): + return False if f.is_filter_out else True + # If we only used filter-out, keep the line, otherwise discard it since no + # filter matched. + return False if has_filter else True def do_filter(body, filters): - return body if not filters else '\n'.join(filter( - lambda line: apply_filters(line, filters), body.splitlines())) + return ( + body + if not filters + else "\n".join( + filter(lambda line: apply_filters(line, filters), body.splitlines()) + ) + ) def scrub_body(body): - # Scrub runs of whitespace out of the assembly, but leave the leading - # whitespace in place. - body = SCRUB_WHITESPACE_RE.sub(r' ', body) - # Expand the tabs used for indentation. - body = str.expandtabs(body, 2) - # Strip trailing whitespace. - body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body) - return body + # Scrub runs of whitespace out of the assembly, but leave the leading + # whitespace in place. + body = SCRUB_PRESERVE_LEADING_WHITESPACE_RE.sub(lambda m: m.group(2) + " ", body) + + # Expand the tabs used for indentation. + body = str.expandtabs(body, 2) + # Strip trailing whitespace. + body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r"", body) + return body def do_scrub(body, scrubber, scrubber_args, extra): - if scrubber_args: - local_args = copy.deepcopy(scrubber_args) - local_args[0].extra_scrub = extra - return scrubber(body, *local_args) - return scrubber(body, *scrubber_args) + if scrubber_args: + local_args = copy.deepcopy(scrubber_args) + local_args[0].extra_scrub = extra + return scrubber(body, *local_args) + return scrubber(body, *scrubber_args) # Build up a dictionary of all the function bodies. class function_body(object): - def __init__(self, string, extra, args_and_sig, attrs, func_name_separator): - self.scrub = string - self.extrascrub = extra - self.args_and_sig = args_and_sig - self.attrs = attrs - self.func_name_separator = func_name_separator - def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs, is_backend): - arg_names = set() - def drop_arg_names(match): - arg_names.add(match.group(variable_group_in_ir_value_match)) - if match.group(attribute_group_in_ir_value_match): - attr = match.group(attribute_group_in_ir_value_match) - else: - attr = '' - return match.group(1) + attr + match.group(match.lastindex) - def repl_arg_names(match): - if match.group(variable_group_in_ir_value_match) is not None and match.group(variable_group_in_ir_value_match) in arg_names: - return match.group(1) + match.group(match.lastindex) - return match.group(1) + match.group(2) + match.group(match.lastindex) - if self.attrs != attrs: - return False - ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) - ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig) - if ans0 != ans1: - return False - if is_backend: - # Check without replacements, the replacements are not applied to the - # body for backend checks. - return self.extrascrub == extrascrub - - es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub) - es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub) - es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0) - es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1) - return es0 == es1 - - def __str__(self): - return self.scrub + def __init__( + self, + string, + extra, + funcdef_attrs_and_ret, + args_and_sig, + attrs, + func_name_separator, + ginfo, + ): + self.scrub = string + self.extrascrub = extra + self.funcdef_attrs_and_ret = funcdef_attrs_and_ret + self.args_and_sig = args_and_sig + self.attrs = attrs + self.func_name_separator = func_name_separator + self._ginfo = ginfo + + def is_same_except_arg_names( + self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs + ): + arg_names = set() + + def drop_arg_names(match): + nameless_value = self._ginfo.get_nameless_value_from_match(match) + if nameless_value.check_key == "%": + arg_names.add(self._ginfo.get_name_from_match(match)) + substitute = "" + else: + substitute = match.group(2) + return match.group(1) + substitute + match.group(match.lastindex) + + def repl_arg_names(match): + nameless_value = self._ginfo.get_nameless_value_from_match(match) + if ( + nameless_value.check_key == "%" + and self._ginfo.get_name_from_match(match) in arg_names + ): + return match.group(1) + match.group(match.lastindex) + return match.group(1) + match.group(2) + match.group(match.lastindex) + + if self.funcdef_attrs_and_ret != funcdef_attrs_and_ret: + return False + if self.attrs != attrs: + return False + + regexp = self._ginfo.get_regexp() + ans0 = regexp.sub(drop_arg_names, self.args_and_sig) + ans1 = regexp.sub(drop_arg_names, args_and_sig) + if ans0 != ans1: + return False + if self._ginfo.is_asm(): + # Check without replacements, the replacements are not applied to the + # body for backend checks. + return self.extrascrub == extrascrub + + es0 = regexp.sub(repl_arg_names, self.extrascrub) + es1 = regexp.sub(repl_arg_names, extrascrub) + es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0) + es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1) + return es0 == es1 + + def __str__(self): + return self.scrub class FunctionTestBuilder: - def __init__(self, run_list, flags, scrubber_args, path): - self._verbose = flags.verbose - self._record_args = flags.function_signature - self._check_attributes = flags.check_attributes - # Strip double-quotes if input was read by UTC_ARGS - self._filters = list(map(lambda f: Filter(re.compile(f.pattern().strip('"'), - f.flags()), - f.is_filter_out), - flags.filters)) if flags.filters else [] - self._scrubber_args = scrubber_args - self._path = path - # Strip double-quotes if input was read by UTC_ARGS - self._replace_value_regex = list(map(lambda x: x.strip('"'), flags.replace_value_regex)) - self._func_dict = {} - self._func_order = {} - self._global_var_dict = {} - self._processed_prefixes = set() - for tuple in run_list: - for prefix in tuple[0]: - self._func_dict.update({prefix:dict()}) - self._func_order.update({prefix: []}) - self._global_var_dict.update({prefix:dict()}) - - def finish_and_get_func_dict(self): - for prefix in self.get_failed_prefixes(): - warn('Prefix %s had conflicting output from different RUN lines for all functions in test %s' % (prefix,self._path,)) - return self._func_dict - - def func_order(self): - return self._func_order - - def global_var_dict(self): - return self._global_var_dict - - def is_filtered(self): - return bool(self._filters) - - def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes, is_backend): - build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes) - for m in function_re.finditer(raw_tool_output): - if not m: - continue - func = m.group('func') - body = m.group('body') - # func_name_separator is the string that is placed right after function name at the - # beginning of assembly function definition. In most assemblies, that is just a - # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is - # False, just assume that separator is an empty string. - if is_backend: - # Use ':' as default separator. - func_name_separator = m.group('func_name_separator') if 'func_name_separator' in m.groupdict() else ':' - else: - func_name_separator = '' - attrs = m.group('attrs') if self._check_attributes else '' - # Determine if we print arguments, the opening brace, or nothing after the - # function name - if self._record_args and 'args_and_sig' in m.groupdict(): - args_and_sig = scrub_body(m.group('args_and_sig').strip()) - elif 'args_and_sig' in m.groupdict(): - args_and_sig = '(' - else: - args_and_sig = '' - filtered_body = do_filter(body, self._filters) - scrubbed_body = do_scrub(filtered_body, scrubber, self._scrubber_args, - extra=False) - scrubbed_extra = do_scrub(filtered_body, scrubber, self._scrubber_args, - extra=True) - if 'analysis' in m.groupdict(): - analysis = m.group('analysis') - if analysis.lower() != 'cost model analysis': - warn('Unsupported analysis mode: %r!' % (analysis,)) - if func.startswith('stress'): - # We only use the last line of the function body for stress tests. - scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) - if self._verbose: - print('Processing function: ' + func, file=sys.stderr) - for l in scrubbed_body.splitlines(): - print(' ' + l, file=sys.stderr) - for prefix in prefixes: - # Replace function names matching the regex. - for regex in self._replace_value_regex: - # Pattern that matches capture groups in the regex in leftmost order. - group_regex = re.compile(r'\(.*?\)') - # Replace function name with regex. - match = re.match(regex, func) - if match: - func_repl = regex - # Replace any capture groups with their matched strings. - for g in match.groups(): - func_repl = group_regex.sub(re.escape(g), func_repl, count=1) - func = re.sub(func_repl, '{{' + func_repl + '}}', func) - - # Replace all calls to regex matching functions. - matches = re.finditer(regex, scrubbed_body) - for match in matches: - func_repl = regex - # Replace any capture groups with their matched strings. - for g in match.groups(): - func_repl = group_regex.sub(re.escape(g), func_repl, count=1) - # Substitute function call names that match the regex with the same - # capture groups set. - scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}', - scrubbed_body) - - if func in self._func_dict[prefix]: - if (self._func_dict[prefix][func] is not None and - (str(self._func_dict[prefix][func]) != scrubbed_body or - self._func_dict[prefix][func].args_and_sig != args_and_sig or - self._func_dict[prefix][func].attrs != attrs)): - if self._func_dict[prefix][func].is_same_except_arg_names( - scrubbed_extra, - args_and_sig, - attrs, - is_backend): - self._func_dict[prefix][func].scrub = scrubbed_extra - self._func_dict[prefix][func].args_and_sig = args_and_sig + def __init__(self, run_list, flags, scrubber_args, path, ginfo): + self._verbose = flags.verbose + self._record_args = flags.function_signature + self._check_attributes = flags.check_attributes + # Strip double-quotes if input was read by UTC_ARGS + self._filters = ( + list( + map( + lambda f: Filter( + re.compile(f.pattern().strip('"'), f.flags()), f.is_filter_out + ), + flags.filters, + ) + ) + if flags.filters + else [] + ) + self._scrubber_args = scrubber_args + self._path = path + self._ginfo = ginfo + # Strip double-quotes if input was read by UTC_ARGS + self._replace_value_regex = list( + map(lambda x: x.strip('"'), flags.replace_value_regex) + ) + self._func_dict = {} + self._func_order = {} + self._global_var_dict = {} + self._processed_prefixes = set() + for tuple in run_list: + for prefix in tuple[0]: + self._func_dict.update({prefix: dict()}) + self._func_order.update({prefix: []}) + self._global_var_dict.update({prefix: dict()}) + + def finish_and_get_func_dict(self): + for prefix in self.get_failed_prefixes(): + warn( + "Prefix %s had conflicting output from different RUN lines for all functions in test %s" + % ( + prefix, + self._path, + ) + ) + return self._func_dict + + def func_order(self): + return self._func_order + + def global_var_dict(self): + return self._global_var_dict + + def is_filtered(self): + return bool(self._filters) + + def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes): + build_global_values_dictionary( + self._global_var_dict, raw_tool_output, prefixes, self._ginfo + ) + for m in function_re.finditer(raw_tool_output): + if not m: + continue + func = m.group("func") + body = m.group("body") + # func_name_separator is the string that is placed right after function name at the + # beginning of assembly function definition. In most assemblies, that is just a + # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is + # False, just assume that separator is an empty string. + if self._ginfo.is_asm(): + # Use ':' as default separator. + func_name_separator = ( + m.group("func_name_separator") + if "func_name_separator" in m.groupdict() + else ":" + ) else: - # This means a previous RUN line produced a body for this function - # that is different from the one produced by this current RUN line, - # so the body can't be common across RUN lines. We use None to - # indicate that. - self._func_dict[prefix][func] = None - else: - if prefix not in self._processed_prefixes: - self._func_dict[prefix][func] = function_body( - scrubbed_body, scrubbed_extra, args_and_sig, attrs, - func_name_separator) - self._func_order[prefix].append(func) - else: - # An earlier RUN line used this check prefixes but didn't produce - # a body for this function. This happens in Clang tests that use - # preprocessor directives to exclude individual functions from some - # RUN lines. - self._func_dict[prefix][func] = None - - def processed_prefixes(self, prefixes): - """ - Mark a set of prefixes as having had at least one applicable RUN line fully - processed. This is used to filter out function bodies that don't have - outputs for all RUN lines. - """ - self._processed_prefixes.update(prefixes) - - def get_failed_prefixes(self): - # This returns the list of those prefixes that failed to match any function, - # because there were conflicting bodies produced by different RUN lines, in - # all instances of the prefix. - for prefix in self._func_dict: - if (self._func_dict[prefix] and - (not [fct for fct in self._func_dict[prefix] - if self._func_dict[prefix][fct] is not None])): - yield prefix + func_name_separator = "" + attrs = m.group("attrs") if self._check_attributes else "" + funcdef_attrs_and_ret = ( + m.group("funcdef_attrs_and_ret") if self._record_args else "" + ) + # Determine if we print arguments, the opening brace, or nothing after the + # function name + if self._record_args and "args_and_sig" in m.groupdict(): + args_and_sig = scrub_body(m.group("args_and_sig").strip()) + elif "args_and_sig" in m.groupdict(): + args_and_sig = "(" + else: + args_and_sig = "" + filtered_body = do_filter(body, self._filters) + scrubbed_body = do_scrub( + filtered_body, scrubber, self._scrubber_args, extra=False + ) + scrubbed_extra = do_scrub( + filtered_body, scrubber, self._scrubber_args, extra=True + ) + if "analysis" in m.groupdict(): + analysis = m.group("analysis") + if analysis not in SUPPORTED_ANALYSES: + warn("Unsupported analysis mode: %r!" % (analysis,)) + if func.startswith("stress"): + # We only use the last line of the function body for stress tests. + scrubbed_body = "\n".join(scrubbed_body.splitlines()[-1:]) + if self._verbose: + print("Processing function: " + func, file=sys.stderr) + for l in scrubbed_body.splitlines(): + print(" " + l, file=sys.stderr) + for prefix in prefixes: + # Replace function names matching the regex. + for regex in self._replace_value_regex: + # Pattern that matches capture groups in the regex in leftmost order. + group_regex = re.compile(r"\(.*?\)") + # Replace function name with regex. + match = re.match(regex, func) + if match: + func_repl = regex + # Replace any capture groups with their matched strings. + for g in match.groups(): + func_repl = group_regex.sub( + re.escape(g), func_repl, count=1 + ) + func = re.sub(func_repl, "{{" + func_repl + "}}", func) + + # Replace all calls to regex matching functions. + matches = re.finditer(regex, scrubbed_body) + for match in matches: + func_repl = regex + # Replace any capture groups with their matched strings. + for g in match.groups(): + func_repl = group_regex.sub( + re.escape(g), func_repl, count=1 + ) + # Substitute function call names that match the regex with the same + # capture groups set. + scrubbed_body = re.sub( + func_repl, "{{" + func_repl + "}}", scrubbed_body + ) + + if func in self._func_dict[prefix]: + if self._func_dict[prefix][func] is not None and ( + str(self._func_dict[prefix][func]) != scrubbed_body + or self._func_dict[prefix][func].args_and_sig != args_and_sig + or self._func_dict[prefix][func].attrs != attrs + or self._func_dict[prefix][func].funcdef_attrs_and_ret + != funcdef_attrs_and_ret + ): + if self._func_dict[prefix][func].is_same_except_arg_names( + scrubbed_extra, + funcdef_attrs_and_ret, + args_and_sig, + attrs, + ): + self._func_dict[prefix][func].scrub = scrubbed_extra + self._func_dict[prefix][func].args_and_sig = args_and_sig + else: + # This means a previous RUN line produced a body for this function + # that is different from the one produced by this current RUN line, + # so the body can't be common across RUN lines. We use None to + # indicate that. + self._func_dict[prefix][func] = None + else: + if prefix not in self._processed_prefixes: + self._func_dict[prefix][func] = function_body( + scrubbed_body, + scrubbed_extra, + funcdef_attrs_and_ret, + args_and_sig, + attrs, + func_name_separator, + self._ginfo, + ) + self._func_order[prefix].append(func) + else: + # An earlier RUN line used this check prefixes but didn't produce + # a body for this function. This happens in Clang tests that use + # preprocessor directives to exclude individual functions from some + # RUN lines. + self._func_dict[prefix][func] = None + + def processed_prefixes(self, prefixes): + """ + Mark a set of prefixes as having had at least one applicable RUN line fully + processed. This is used to filter out function bodies that don't have + outputs for all RUN lines. + """ + self._processed_prefixes.update(prefixes) + + def get_failed_prefixes(self): + # This returns the list of those prefixes that failed to match any function, + # because there were conflicting bodies produced by different RUN lines, in + # all instances of the prefix. + for prefix in self._func_dict: + if self._func_dict[prefix] and ( + not [ + fct + for fct in self._func_dict[prefix] + if self._func_dict[prefix][fct] is not None + ] + ): + yield prefix ##### Generator of LLVM IR CHECK lines -SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') +SCRUB_IR_COMMENT_RE = re.compile(r"\s*;.*") # TODO: We should also derive check lines for global, debug, loop declarations, etc.. class NamelessValue: - def __init__(self, check_prefix, check_key, ir_prefix, global_ir_prefix, global_ir_prefix_regexp, - ir_regexp, global_ir_rhs_regexp, is_before_functions, *, - is_number=False, replace_number_with_counter=False): - self.check_prefix = check_prefix - self.check_key = check_key - self.ir_prefix = ir_prefix - self.global_ir_prefix = global_ir_prefix - self.global_ir_prefix_regexp = global_ir_prefix_regexp - self.ir_regexp = ir_regexp - self.global_ir_rhs_regexp = global_ir_rhs_regexp - self.is_before_functions = is_before_functions - self.is_number = is_number - # Some variable numbers (e.g. MCINST1234) will change based on unrelated - # modifications to LLVM, replace those with an incrementing counter. - self.replace_number_with_counter = replace_number_with_counter - self.variable_mapping = {} - - # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'. - def is_local_def_ir_value_match(self, match): - return self.ir_prefix == '%' - - # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'. - def is_global_scope_ir_value_match(self, match): - return self.global_ir_prefix is not None - - # Return the IR prefix and check prefix we use for this kind or IR value, - # e.g., (%, TMP) for locals. - def get_ir_prefix_from_ir_value_match(self, match): - if self.ir_prefix and match.group(0).strip().startswith(self.ir_prefix): - return self.ir_prefix, self.check_prefix - return self.global_ir_prefix, self.check_prefix - - # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals - def get_ir_regex_from_ir_value_re_match(self, match): - # for backwards compatibility we check locals with '.*' - if self.is_local_def_ir_value_match(match): - return '.*' - if self.ir_prefix and match.group(0).strip().startswith(self.ir_prefix): - return self.ir_regexp - return self.global_ir_prefix_regexp - - # Create a FileCheck variable name based on an IR name. - def get_value_name(self, var: str, check_prefix: str): - var = var.replace('!', '') - if self.replace_number_with_counter: - assert var.isdigit(), var - replacement = self.variable_mapping.get(var, None) - if replacement is None: - # Replace variable with an incrementing counter - replacement = str(len(self.variable_mapping) + 1) - self.variable_mapping[var] = replacement - var = replacement - # This is a nameless value, prepend check_prefix. - if var.isdigit(): - var = check_prefix + var - else: - # This is a named value that clashes with the check_prefix, prepend with - # _prefix_filecheck_ir_name, if it has been defined. - if may_clash_with_default_check_prefix_name(check_prefix, var) and _prefix_filecheck_ir_name: - var = _prefix_filecheck_ir_name + var - var = var.replace('.', '_') - var = var.replace('-', '_') - return var.upper() - - # Create a FileCheck variable from regex. - def get_value_definition(self, var, match): - # for backwards compatibility we check locals with '.*' - varname = self.get_value_name(var, self.check_prefix) - prefix = self.get_ir_prefix_from_ir_value_match(match)[0] - if self.is_number: - regex = '' # always capture a number in the default format - capture_start = '[[#' - else: - regex = self.get_ir_regex_from_ir_value_re_match(match) - capture_start = '[[' - if self.is_local_def_ir_value_match(match): - return capture_start + varname + ':' + prefix + regex + ']]' - return prefix + capture_start + varname + ':' + regex + ']]' - - # Use a FileCheck variable. - def get_value_use(self, var, match, var_prefix=None): - if var_prefix is None: - var_prefix = self.check_prefix - capture_start = '[[#' if self.is_number else '[[' - if self.is_local_def_ir_value_match(match): - return capture_start + self.get_value_name(var, var_prefix) + ']]' - prefix = self.get_ir_prefix_from_ir_value_match(match)[0] - return prefix + capture_start + self.get_value_name(var, var_prefix) + ']]' - -# Description of the different "unnamed" values we match in the IR, e.g., -# (local) ssa values, (debug) metadata, etc. -ir_nameless_values = [ - NamelessValue(r'TMP' , '%' , r'%' , None , None , r'[\w$.-]+?' , None , False) , - NamelessValue(r'ATTR' , '#' , r'#' , None , None , r'[0-9]+' , None , False) , - NamelessValue(r'ATTR' , '#' , None , r'attributes #' , r'[0-9]+' , None , r'{[^}]*}' , False) , - NamelessValue(r'GLOB' , '@' , r'@' , None , None , r'[0-9]+' , None , False) , - NamelessValue(r'GLOB' , '@' , None , r'@' , r'[a-zA-Z0-9_$"\\.-]+' , None , r'.+' , True) , - NamelessValue(r'DBG' , '!' , r'!dbg ' , None , None , r'![0-9]+' , None , False) , - NamelessValue(r'PROF' , '!' , r'!prof ' , None , None , r'![0-9]+' , None , False) , - NamelessValue(r'TBAA' , '!' , r'!tbaa ' , None , None , r'![0-9]+' , None , False) , - NamelessValue(r'RNG' , '!' , r'!range ' , None , None , r'![0-9]+' , None , False) , - NamelessValue(r'LOOP' , '!' , r'!llvm.loop ' , None , None , r'![0-9]+' , None , False) , - NamelessValue(r'META' , '!' , r'metadata ' , None , None , r'![0-9]+' , None , False) , - NamelessValue(r'META' , '!' , None , r'' , r'![0-9]+' , None , r'(?:distinct |)!.*' , False) , - NamelessValue(r'ACC_GRP' , '!' , r'!llvm.access.group ' , None , None , r'![0-9]+' , None , False) , -] + """ + A NamelessValue object represents a type of value in the IR whose "name" we + generalize in the generated check lines; where the "name" could be an actual + name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12` + or `!4`). + """ -asm_nameless_values = [ - NamelessValue(r'MCINST', 'Inst#', None, '\s]|\Z)' -ASM_VALUE_RE = re.compile(r'((?:#|//)\s*)' + '(' + ASM_VALUE_REGEXP_STRING + ')' + ASM_VALUE_REGEXP_SUFFIX) - -# The entire match is group 0, the prefix has one group (=1), the entire -# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. -first_nameless_group_in_ir_value_match = 3 - -# constants for the group id of special matches -variable_group_in_ir_value_match = 3 -attribute_group_in_ir_value_match = 4 - -# Check a match for IR_VALUE_RE and inspect it to determine if it was a local -# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above. -def get_idx_from_ir_value_match(match): - for i in range(first_nameless_group_in_ir_value_match, match.lastindex): - if match.group(i) is not None: - return i - first_nameless_group_in_ir_value_match - error("Unable to identify the kind of IR value from the match!") - return 0 - -# See get_idx_from_ir_value_match -def get_name_from_ir_value_match(match): - return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match) - -def get_nameless_value_from_match(match, nameless_values) -> NamelessValue: - return nameless_values[get_idx_from_ir_value_match(match)] + MODE_IR = 0 + MODE_ASM = 1 + MODE_ANALYZE = 2 + + def __init__( + self, + version, + mode, + nameless_values: List[NamelessValue], + regexp_prefix, + regexp_suffix, + ): + self._version = version + self._mode = mode + self._nameless_values = nameless_values + + self._regexp_prefix = regexp_prefix + self._regexp_suffix = regexp_suffix + + self._regexp, _ = self._build_regexp(False, False) + ( + self._unstable_globals_regexp, + self._unstable_globals_values, + ) = self._build_regexp(True, True) + + def _build_regexp(self, globals_only, unstable_only): + matches = [] + values = [] + for nameless_value in self._nameless_values: + is_global = nameless_value.global_ir_rhs_regexp is not None + if globals_only and not is_global: + continue + if unstable_only and nameless_value.match_literally: + continue + + match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})" + if self.is_ir() and not globals_only and is_global: + match = "^" + match + matches.append(match) + values.append(nameless_value) + + regexp_string = r"|".join(matches) + + return ( + re.compile( + self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix + ), + values, + ) + + def get_version(self): + return self._version + + def is_ir(self): + return self._mode == GeneralizerInfo.MODE_IR + + def is_asm(self): + return self._mode == GeneralizerInfo.MODE_ASM + + def is_analyze(self): + return self._mode == GeneralizerInfo.MODE_ANALYZE + + def get_nameless_values(self): + return self._nameless_values + + def get_regexp(self): + return self._regexp + + def get_unstable_globals_regexp(self): + return self._unstable_globals_regexp + + # The entire match is group 0, the prefix has one group (=1), the entire + # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. + FIRST_NAMELESS_GROUP_IN_MATCH = 3 + + def get_match_info(self, match): + """ + Returns (name, nameless_value) for the given match object + """ + if match.re == self._regexp: + values = self._nameless_values + else: + match.re == self._unstable_globals_regexp + values = self._unstable_globals_values + for i in range(len(values)): + g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH) + if g is not None: + return g, values[i] + error("Unable to identify the kind of IR value from the match!") + return None, None + + # See get_idx_from_match + def get_name_from_match(self, match): + return self.get_match_info(match)[0] + + def get_nameless_value_from_match(self, match) -> NamelessValue: + return self.get_match_info(match)[1] + +def make_ir_generalizer(version): + values = [] + + if version >= 5: + values += [ + NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None), + NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"), + ] + + values += [ + # check_prefix check_key ir_prefix ir_regexp global_ir_rhs_regexp + NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None), + NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None), + NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"), + NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None), + NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True), + NamelessValue( + r"GLOBNAMED", + "@", + r"@", + r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*", + r".+", + is_before_functions=True, + match_literally=True, + interlaced_with_previous=True, + ), + NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None), + NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None), + NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None), + NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None), + NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None), + NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None), + NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None), + NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"), + NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None), + NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None), + NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None), + ] + + prefix = r"(\s*)" + suffix = r"([,\s\(\)\}]|\Z)" + + # values = [ + # nameless_value + # for nameless_value in IR_NAMELESS_VALUES + # if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and + # not (unstable_ids_only and nameless_value.match_literally) + # ] + + return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix) + +def make_asm_generalizer(version): + values = [ + NamelessValue( + r"MCINST", + "Inst#", + "\s]|\Z)" + + return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix) + +def make_analyze_generalizer(version): + values = [ + NamelessValue( + r"GRP", + "#", + r"", + r"0x[0-9a-f]+", + None, + replace_number_with_counter=True, + ), + ] + + prefix = r"(\s*)" + suffix = r"(\)?:)" + + return GeneralizerInfo( + version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix + ) # Return true if var clashes with the scripted FileCheck check_prefix. def may_clash_with_default_check_prefix_name(check_prefix, var): - return check_prefix and re.match(r'^' + check_prefix + r'[0-9]+?$', var, re.IGNORECASE) - -def generalize_check_lines_common(lines, is_analyze, vars_seen, - global_vars_seen, nameless_values, - nameless_value_regex, is_asm): - # This gets called for each match that occurs in - # a line. We transform variables we haven't seen - # into defs, and variables we have seen into uses. - def transform_line_vars(match): - var = get_name_from_ir_value_match(match) - nameless_value = get_nameless_value_from_match(match, nameless_values) - if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var): - warn("Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" - " with scripted FileCheck name." % (var,)) - key = (var, nameless_value.check_key) - is_local_def = nameless_value.is_local_def_ir_value_match(match) - if is_local_def and key in vars_seen: - rv = nameless_value.get_value_use(var, match) - elif not is_local_def and key in global_vars_seen: - # We could have seen a different prefix for the global variables first, - # ensure we use that one instead of the prefix for the current match. - rv = nameless_value.get_value_use(var, match, global_vars_seen[key]) - else: - if is_local_def: - vars_seen.add(key) - else: - global_vars_seen[key] = nameless_value.check_prefix - rv = nameless_value.get_value_definition(var, match) - # re.sub replaces the entire regex match - # with whatever you return, so we have - # to make sure to hand it back everything - # including the commas and spaces. - return match.group(1) + rv + match.group(match.lastindex) - - lines_with_def = [] - - for i, line in enumerate(lines): - if not is_asm: - # An IR variable named '%.' matches the FileCheck regex string. - line = line.replace('%.', '%dot') - for regex in _global_hex_value_regex: - if re.match('^@' + regex + ' = ', line): - line = re.sub(r'\bi([0-9]+) ([0-9]+)', - lambda m : 'i' + m.group(1) + ' [[#' + hex(int(m.group(2))) + ']]', - line) - break - # Ignore any comments, since the check lines will too. - scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) - lines[i] = scrubbed_line - if is_asm or not is_analyze: - # It can happen that two matches are back-to-back and for some reason sub - # will not replace both of them. For now we work around this by - # substituting until there is no more match. - changed = True - while changed: - (lines[i], changed) = nameless_value_regex.subn(transform_line_vars, - lines[i], count=1) - return lines - -# Replace IR value defs and uses with FileCheck variables. -def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen): - return generalize_check_lines_common(lines, is_analyze, vars_seen, - global_vars_seen, ir_nameless_values, - IR_VALUE_RE, False) - -def generalize_asm_check_lines(lines, vars_seen, global_vars_seen): - return generalize_check_lines_common(lines, False, vars_seen, - global_vars_seen, asm_nameless_values, - ASM_VALUE_RE, True) - -def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_backend, is_analyze, global_vars_seen_dict, is_filtered): - # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. - prefix_exclusions = set() - printed_prefixes = [] - for p in prefix_list: - checkprefixes = p[0] - # If not all checkprefixes of this run line produced the function we cannot check for it as it does not - # exist for this run line. A subset of the check prefixes might know about the function but only because - # other run lines created it. - if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)): - prefix_exclusions |= set(checkprefixes) - continue - - # prefix_exclusions is constructed, we can now emit the output - for p in prefix_list: - global_vars_seen = {} - checkprefixes = p[0] - for checkprefix in checkprefixes: - if checkprefix in global_vars_seen_dict: - global_vars_seen.update(global_vars_seen_dict[checkprefix]) - else: - global_vars_seen_dict[checkprefix] = {} - if checkprefix in printed_prefixes: - break - - # Check if the prefix is excluded. - if checkprefix in prefix_exclusions: - continue - - # If we do not have output for this prefix we skip it. - if not func_dict[checkprefix][func_name]: - continue - - # Add some space between different check prefixes, but not after the last - # check line (before the test code). - if is_backend: - if len(printed_prefixes) != 0: - output_lines.append(comment_marker) - - if checkprefix not in global_vars_seen_dict: - global_vars_seen_dict[checkprefix] = {} - - global_vars_seen_before = [key for key in global_vars_seen.keys()] - - vars_seen = set() - printed_prefixes.append(checkprefix) - attrs = str(func_dict[checkprefix][func_name].attrs) - attrs = '' if attrs == 'None' else attrs - if attrs: - output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs)) - args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) - if args_and_sig: - args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0] - func_name_separator = func_dict[checkprefix][func_name].func_name_separator - if '[[' in args_and_sig: - output_lines.append(check_label_format % (checkprefix, func_name, '', func_name_separator)) - output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) - else: - output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig, func_name_separator)) - func_body = str(func_dict[checkprefix][func_name]).splitlines() - if not func_body: - # We have filtered everything. - continue - - # For ASM output, just emit the check lines. - if is_backend: - body_start = 1 - if is_filtered: - # For filtered output we don't add "-NEXT" so don't add extra spaces - # before the first line. - body_start = 0 + return check_prefix and re.match( + r"^" + check_prefix + r"[0-9]+?$", var, re.IGNORECASE + ) + +def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[tuple]: + """ + Find a large ordered matching between strings in lhs and rhs. + + Think of this as finding the *unchanged* lines in a diff, where the entries + of lhs and rhs are lines of the files being diffed. + + Returns a list of matched (lhs_idx, rhs_idx) pairs. + """ + + if not lhs or not rhs: + return [] + + # Collect matches in reverse order. + matches = [] + + # First, collect a set of candidate matching edges. We limit this to a + # constant multiple of the input size to avoid quadratic runtime. + patterns = collections.defaultdict(lambda: ([], [])) + + for idx in range(len(lhs)): + patterns[lhs[idx]][0].append(idx) + for idx in range(len(rhs)): + patterns[rhs[idx]][1].append(idx) + + multiple_patterns = [] + + candidates = [] + for pattern in patterns.values(): + if not pattern[0] or not pattern[1]: + continue + + if len(pattern[0]) == len(pattern[1]) == 1: + candidates.append((pattern[0][0], pattern[1][0])) else: - output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) - func_lines = generalize_asm_check_lines(func_body[body_start:], - vars_seen, global_vars_seen) - for func_line in func_lines: - if func_line.strip() == '': - output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix)) - else: - check_suffix = '-NEXT' if not is_filtered else '' - output_lines.append('%s %s%s: %s' % (comment_marker, checkprefix, - check_suffix, func_line)) - # Remember new global variables we have not seen before - for key in global_vars_seen: - if key not in global_vars_seen_before: - global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] - break - - # For IR output, change all defs to FileCheck variables, so we're immune - # to variable naming fashions. - func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen) - - # This could be selectively enabled with an optional invocation argument. - # Disabled for now: better to check everything. Be safe rather than sorry. - - # Handle the first line of the function body as a special case because - # it's often just noise (a useless asm comment or entry label). - #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): - # is_blank_line = True - #else: - # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) - # is_blank_line = False - - is_blank_line = False - - for func_line in func_body: - if func_line.strip() == '': - is_blank_line = True - continue - # Do not waste time checking IR comments. - func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) - - # Skip blank lines instead of checking them. - if is_blank_line: - output_lines.append('{} {}: {}'.format( - comment_marker, checkprefix, func_line)) + multiple_patterns.append(pattern) + + multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1])) + + for pattern in multiple_patterns: + if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * ( + len(lhs) + len(rhs) + ): + break + for lhs_idx in pattern[0]: + for rhs_idx in pattern[1]: + candidates.append((lhs_idx, rhs_idx)) + + if not candidates: + # The LHS and RHS either share nothing in common, or lines are just too + # identical. In that case, let's give up and not match anything. + return [] + + # Compute a maximal crossing-free matching via an algorithm that is + # inspired by a mixture of dynamic programming and line-sweeping in + # discrete geometry. + # + # I would be surprised if this algorithm didn't exist somewhere in the + # literature, but I found it without consciously recalling any + # references, so you'll have to make do with the explanation below. + # Sorry. + # + # The underlying graph is bipartite: + # - nodes on the LHS represent lines in the original check + # - nodes on the RHS represent lines in the new (updated) check + # + # Nodes are implicitly sorted by the corresponding line number. + # Edges (unique_matches) are sorted by the line number on the LHS. + # + # Here's the geometric intuition for the algorithm. + # + # * Plot the edges as points in the plane, with the original line + # number on the X axis and the updated line number on the Y axis. + # * The goal is to find a longest "chain" of points where each point + # is strictly above and to the right of the previous point. + # * The algorithm proceeds by sweeping a vertical line from left to + # right. + # * The algorithm maintains a table where `table[N]` answers the + # question "What is currently the 'best' way to build a chain of N+1 + # points to the left of the vertical line". Here, 'best' means + # that the last point of the chain is a as low as possible (minimal + # Y coordinate). + # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of + # the last point in the chain and `y` is its Y coordinate + # * A key invariant is that the Y values in the table are + # monotonically increasing + # * Thanks to these properties, the table can be used to answer the + # question "What is the longest chain that can be built to the left + # of the vertical line using only points below a certain Y value", + # using a binary search over the table. + # * The algorithm also builds a backlink structure in which every point + # links back to the previous point on a best (longest) chain ending + # at that point + # + # The core loop of the algorithm sweeps the line and updates the table + # and backlink structure for every point that we cross during the sweep. + # Therefore, the algorithm is trivially O(M log M) in the number of + # points. + candidates.sort(key=lambda candidate: (candidate[0], -candidate[1])) + + backlinks = [] + table_rhs_idx = [] + table_candidate_idx = [] + for _, rhs_idx in candidates: + candidate_idx = len(backlinks) + ti = bisect.bisect_left(table_rhs_idx, rhs_idx) + + # Update the table to record a best chain ending in the current point. + # There always is one, and if any of the previously visited points had + # a higher Y coordinate, then there is always a previously recorded best + # chain that can be improved upon by using the current point. + # + # There is only one case where there is some ambiguity. If the + # pre-existing entry table[ti] has the same Y coordinate / rhs_idx as + # the current point (this can only happen if the same line appeared + # multiple times on the LHS), then we could choose to keep the + # previously recorded best chain instead. That would bias the algorithm + # differently but should have no systematic impact on the quality of the + # result. + if ti < len(table_rhs_idx): + table_rhs_idx[ti] = rhs_idx + table_candidate_idx[ti] = candidate_idx else: - check_suffix = '-NEXT' if not is_filtered else '' - output_lines.append('{} {}{}: {}'.format( - comment_marker, checkprefix, check_suffix, func_line)) - is_blank_line = False - - # Add space between different check prefixes and also before the first - # line of code in the test function. - output_lines.append(comment_marker) - - # Remember new global variables we have not seen before - for key in global_vars_seen: - if key not in global_vars_seen_before: - global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] - break - return printed_prefixes - -def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, - func_name, preserve_names, function_sig, - global_vars_seen_dict, is_filtered): - # Label format is based on IR string. - function_def_regex = 'define {{[^@]+}}' if function_sig else '' - check_label_format = '{} %s-LABEL: {}@%s%s%s'.format(comment_marker, function_def_regex) - return add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, - check_label_format, False, preserve_names, global_vars_seen_dict, - is_filtered) - -def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered): - check_label_format = '{} %s-LABEL: \'%s%s%s\''.format(comment_marker) - global_vars_seen_dict = {} - return add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, - check_label_format, False, True, global_vars_seen_dict, - is_filtered) - -def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): - for nameless_value in itertools.chain(ir_nameless_values, asm_nameless_values): - if nameless_value.global_ir_prefix is None: - continue - - lhs_re_str = nameless_value.global_ir_prefix + nameless_value.global_ir_prefix_regexp - rhs_re_str = nameless_value.global_ir_rhs_regexp - - global_ir_value_re_str = r'^' + lhs_re_str + r'\s=\s' + rhs_re_str + r'$' - global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) - lines = [] - for m in global_ir_value_re.finditer(raw_tool_output): - lines.append(m.group(0)) - - for prefix in prefixes: - if glob_val_dict[prefix] is None: - continue - if nameless_value.check_prefix in glob_val_dict[prefix]: - if lines == glob_val_dict[prefix][nameless_value.check_prefix]: - continue - if prefix == prefixes[-1]: - warn('Found conflicting asm under the same prefix: %r!' % (prefix,)) + table_rhs_idx.append(rhs_idx) + table_candidate_idx.append(candidate_idx) + if ti > 0: + backlinks.append(table_candidate_idx[ti - 1]) else: - glob_val_dict[prefix][nameless_value.check_prefix] = None - continue - glob_val_dict[prefix][nameless_value.check_prefix] = lines - -def add_global_checks(glob_val_dict, comment_marker, prefix_list, output_lines, global_vars_seen_dict, is_analyze, is_before_functions): - printed_prefixes = set() - for nameless_value in ir_nameless_values: - if nameless_value.global_ir_prefix is None: - continue - if nameless_value.is_before_functions != is_before_functions: - continue + backlinks.append(None) + + # Commit to names in the matching by walking the backlinks. Recursively + # attempt to fill in more matches in-between. + match_idx = table_candidate_idx[-1] + while match_idx is not None: + current = candidates[match_idx] + matches.append(current) + match_idx = backlinks[match_idx] + + matches.reverse() + return matches + +VARIABLE_TAG = "[[@@]]" +METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]") +NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$") + +class TestVar: + def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str): + self._nameless_value = nameless_value + + self._prefix = prefix + self._suffix = suffix + + def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str): + if prefix != self._prefix: + self._prefix = "" + if suffix != self._suffix: + self._suffix = "" + + def get_variable_name(self, text): + return self._nameless_value.get_value_name( + text, self._nameless_value.check_prefix + ) + + def get_def(self, name, prefix, suffix): + if self._nameless_value.is_number: + return f"{prefix}[[#{name}:]]{suffix}" + if self._prefix: + assert self._prefix == prefix + prefix = "" + if self._suffix: + assert self._suffix == suffix + suffix = "" + return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}" + + def get_use(self, name, prefix, suffix): + if self._nameless_value.is_number: + return f"{prefix}[[#{name}]]{suffix}" + if self._prefix: + assert self._prefix == prefix + prefix = "" + if self._suffix: + assert self._suffix == suffix + suffix = "" + return f"{prefix}[[{name}]]{suffix}" + +class CheckValueInfo: + def __init__( + self, + key, + text, + name: str, + prefix: str, + suffix: str, + ): + # Key for the value, e.g. '%' + self.key = key + + # Text to be matched by the FileCheck variable (without any prefix or suffix) + self.text = text + + # Name of the FileCheck variable + self.name = name + + # Prefix and suffix that were captured by the NamelessValue regular expression + self.prefix = prefix + self.suffix = suffix + +# Represent a check line in a way that allows us to compare check lines while +# ignoring some or all of the FileCheck variable names. +class CheckLineInfo: + def __init__(self, line, values): + # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG + self.line: str = line + + # Information on each FileCheck variable name occurrences in the line + self.values: List[CheckValueInfo] = values + + def __repr__(self): + return f"CheckLineInfo(line={self.line}, self.values={self.values})" + +def remap_metavar_names( + old_line_infos: List[CheckLineInfo], + new_line_infos: List[CheckLineInfo], + committed_names: Set[str], +) -> Mapping[str, str]: + """ + Map all FileCheck variable names that appear in new_line_infos to new + FileCheck variable names in an attempt to reduce the diff from old_line_infos + to new_line_infos. + + This is done by: + * Matching old check lines and new check lines using a diffing algorithm + applied after replacing names with wildcards. + * Committing to variable names such that the matched lines become equal + (without wildcards) if possible + * This is done recursively to handle cases where many lines are equal + after wildcard replacement + """ + # Initialize uncommitted identity mappings + new_mapping = {} + for line in new_line_infos: + for value in line.values: + new_mapping[value.name] = value.name + + # Recursively commit to the identity mapping or find a better one + def recurse(old_begin, old_end, new_begin, new_end): + if old_begin == old_end or new_begin == new_end: + return + + # Find a matching of lines where uncommitted names are replaced + # with a placeholder. + def diffify_line(line, mapper): + values = [] + for value in line.values: + mapped = mapper(value.name) + values.append(mapped if mapped in committed_names else "?") + return line.line.strip() + " @@@ " + " @ ".join(values) + + lhs_lines = [ + diffify_line(line, lambda x: x) + for line in old_line_infos[old_begin:old_end] + ] + rhs_lines = [ + diffify_line(line, lambda x: new_mapping[x]) + for line in new_line_infos[new_begin:new_end] + ] + + candidate_matches = find_diff_matching(lhs_lines, rhs_lines) + + candidate_matches = [ + (old_begin + lhs_idx, new_begin + rhs_idx) + for lhs_idx, rhs_idx in candidate_matches + ] + + # Candidate matches may conflict if they require conflicting mappings of + # names. We want to determine a large set of compatible candidates, + # because that leads to a small diff. + # + # We think of the candidates as vertices in a conflict graph. The + # conflict graph has edges between incompatible candidates. We want to + # find a large independent set in this graph. + # + # Greedily selecting candidates and removing incompatible ones has the + # disadvantage that making few bad decisions early on can have huge + # consequences. + # + # Instead, we implicitly compute multiple independent sets by greedily + # assigning a *coloring* to the conflict graph. Then, we select the + # largest color class (which is the largest independent set we found), + # commit to all candidates in it, and recurse. + # + # Note that we don't actually materialize the conflict graph. Instead, + # each color class tracks the information needed to decide implicitly + # whether a vertex conflicts (has an edge to) any of the vertices added + # to the color class so far. + class Color: + def __init__(self): + # (lhs_idx, rhs_idx) of matches in this color + self.matches = [] + + # rhs_name -> lhs_name mappings required by this color + self.mapping = {} + + # lhs_names committed for this color + self.committed = set() + + colors = [] + + for lhs_idx, rhs_idx in candidate_matches: + lhs_line = old_line_infos[lhs_idx] + rhs_line = new_line_infos[rhs_idx] + + # We scan through the uncommitted names in the candidate line and + # filter out the color classes to which the candidate could be + # assigned. + # + # Simultaneously, we prepare a new color class in case the candidate + # conflicts with all colors that have been established so far. + compatible_colors = colors[:] + new_color = Color() + new_color.matches.append((lhs_idx, rhs_idx)) + + for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values): + if new_mapping[rhs_value.name] in committed_names: + # The new value has already been committed. If it was mapped + # to the same name as the original value, we can consider + # committing other values from this line. Otherwise, we + # should ignore this line. + if new_mapping[rhs_value.name] == lhs_value.name: + continue + else: + break + + if rhs_value.name in new_color.mapping: + # Same, but for a possible commit happening on the same line + if new_color.mapping[rhs_value.name] == lhs_value.name: + continue + else: + break + + if ( + lhs_value.name in committed_names + or lhs_value.name in new_color.committed + ): + # We can't map this value because the name we would map it + # to has already been committed for something else. Give up + # on this line. + break + + new_color.mapping[rhs_value.name] = lhs_value.name + new_color.committed.add(lhs_value.name) + + color_idx = 0 + while color_idx < len(compatible_colors): + color = compatible_colors[color_idx] + compatible = True + if rhs_value.name in color.mapping: + compatible = color.mapping[rhs_value.name] == lhs_value.name + else: + compatible = lhs_value.name not in color.committed + if compatible: + color_idx += 1 + else: + del compatible_colors[color_idx] + else: + # We never broke out of the loop, which means that at a minimum, + # this line is viable standalone + if compatible_colors: + color = max(compatible_colors, key=lambda color: len(color.matches)) + color.mapping.update(new_color.mapping) + color.committed.update(new_color.committed) + color.matches.append((lhs_idx, rhs_idx)) + else: + colors.append(new_color) + + if colors: + # Pick the largest color class. This gives us a large independent + # (non-conflicting) set of candidate matches. Assign all names + # required by the independent set and recurse. + max_color = max(colors, key=lambda color: len(color.matches)) + + for rhs_var, lhs_var in max_color.mapping.items(): + new_mapping[rhs_var] = lhs_var + committed_names.add(lhs_var) + + if ( + lhs_var != rhs_var + and lhs_var in new_mapping + and new_mapping[lhs_var] == lhs_var + ): + new_mapping[lhs_var] = "conflict_" + lhs_var + + matches = ( + [(old_begin - 1, new_begin - 1)] + + max_color.matches + + [(old_end, new_end)] + ) + + for (lhs_prev, rhs_prev), (lhs_next, rhs_next) in zip(matches, matches[1:]): + recurse(lhs_prev + 1, lhs_next, rhs_prev + 1, rhs_next) + + recurse(0, len(old_line_infos), 0, len(new_line_infos)) + + # Commit to remaining names and resolve conflicts + for new_name, mapped_name in new_mapping.items(): + if mapped_name in committed_names: + continue + if not mapped_name.startswith("conflict_"): + assert mapped_name == new_name + committed_names.add(mapped_name) + + for new_name, mapped_name in new_mapping.items(): + if mapped_name in committed_names: + continue + assert mapped_name.startswith("conflict_") + + m = NUMERIC_SUFFIX_RE.search(new_name) + base_name = new_name[: m.start()] + suffix = int(new_name[m.start() :]) if m.start() != m.end() else 1 + while True: + candidate = f"{base_name}{suffix}" + if candidate not in committed_names: + new_mapping[new_name] = candidate + committed_names.add(candidate) + break + suffix += 1 + + return new_mapping + +def generalize_check_lines( + lines, + ginfo: GeneralizerInfo, + vars_seen, + global_vars_seen, + preserve_names=False, + original_check_lines=None, + *, + unstable_globals_only=False, + generalize_calls=False, +): + if unstable_globals_only: + regexp = ginfo.get_unstable_globals_regexp() + else: + regexp = ginfo.get_regexp() + + multiple_braces_re = re.compile(r"({{+)|(}}+)") + + def escape_braces(match_obj): + return "{{" + re.escape(match_obj.group(0)) + "}}" + + if ginfo.is_ir(): + for i, line in enumerate(lines): + # An IR variable named '%.' matches the FileCheck regex string. + line = line.replace("%.", "%dot") + + if re.search(CALL_INST_RE, line) and generalize_calls: + line = re.sub(re.compile(TYPE_SUFFIX_RE + r"\("), "{{.*}}(", line) + + for regex in _global_hex_value_regex: + if re.match("^@" + regex + " = ", line): + line = re.sub( + r"\bi([0-9]+) ([0-9]+)", + lambda m: "i" + + m.group(1) + + " [[#" + + hex(int(m.group(2))) + + "]]", + line, + ) + break + # Ignore any comments, since the check lines will too. + scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r"", line) + lines[i] = scrubbed_line + + if not preserve_names: + committed_names = set( + test_var.get_variable_name(name) + for (name, _), test_var in vars_seen.items() + ) + defs = set() + + # Collect information about new check lines, and generalize global reference + new_line_infos = [] + for line in lines: + filtered_line = "" + values = [] + while True: + m = regexp.search(line) + if m is None: + filtered_line += line + break + + name = ginfo.get_name_from_match(m) + nameless_value = ginfo.get_nameless_value_from_match(m) + prefix, suffix = nameless_value.get_affixes_from_match(m) + if may_clash_with_default_check_prefix_name( + nameless_value.check_prefix, name + ): + warn( + "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict" + " with scripted FileCheck name." % (name,) + ) + + # Record the variable as seen and (for locals) accumulate + # prefixes/suffixes + is_local_def = nameless_value.is_local_def_ir_value() + if is_local_def: + vars_dict = vars_seen + else: + vars_dict = global_vars_seen + + key = (name, nameless_value.check_key) + + if is_local_def: + test_prefix = prefix + test_suffix = suffix + else: + test_prefix = "" + test_suffix = "" + + if key in vars_dict: + vars_dict[key].seen(nameless_value, test_prefix, test_suffix) + else: + vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix) + defs.add(key) + + var = vars_dict[key].get_variable_name(name) + + # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. + filtered_line += ( + line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex) + ) + + line = line[m.end() :] + + values.append( + CheckValueInfo( + key=nameless_value.check_key, + text=name, + name=var, + prefix=prefix, + suffix=suffix, + ) + ) + + new_line_infos.append(CheckLineInfo(filtered_line, values)) + + committed_names.update( + test_var.get_variable_name(name) + for (name, _), test_var in global_vars_seen.items() + ) + + # Collect information about original check lines, if any. + orig_line_infos = [] + for line in original_check_lines or []: + filtered_line = "" + values = [] + while True: + m = METAVAR_RE.search(line) + if m is None: + filtered_line += line + break + + # Replace with a [[@@]] tag, but be sure to keep the spaces and commas. + filtered_line += line[: m.start()] + VARIABLE_TAG + line = line[m.end() :] + values.append( + CheckValueInfo( + key=None, + text=None, + name=m.group(1), + prefix="", + suffix="", + ) + ) + orig_line_infos.append(CheckLineInfo(filtered_line, values)) + + # Compute the variable name mapping + mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names) + + # Apply the variable name mapping + for i, line_info in enumerate(new_line_infos): + line_template = line_info.line + line = "" + + for value in line_info.values: + idx = line_template.find(VARIABLE_TAG) + line += line_template[:idx] + line_template = line_template[idx + len(VARIABLE_TAG) :] + + key = (value.text, value.key) + if value.key == "%": + vars_dict = vars_seen + else: + vars_dict = global_vars_seen + + if key in defs: + line += vars_dict[key].get_def( + mapping[value.name], value.prefix, value.suffix + ) + defs.remove(key) + else: + line += vars_dict[key].get_use( + mapping[value.name], value.prefix, value.suffix + ) + + line += line_template + + lines[i] = line + + if ginfo.is_analyze(): + for i, _ in enumerate(lines): + # Escape multiple {{ or }} as {{}} denotes a FileCheck regex. + scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i]) + lines[i] = scrubbed_line + + return lines + +def add_checks( + output_lines, + comment_marker, + prefix_list, + func_dict, + func_name, + check_label_format, + ginfo, + global_vars_seen_dict, + is_filtered, + preserve_names=False, + original_check_lines: Mapping[str, List[str]] = {}, + generalize_calls=False, +): + # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. + prefix_exclusions = set() + printed_prefixes = [] for p in prefix_list: - global_vars_seen = {} - checkprefixes = p[0] - if checkprefixes is None: - continue - for checkprefix in checkprefixes: - if checkprefix in global_vars_seen_dict: - global_vars_seen.update(global_vars_seen_dict[checkprefix]) - else: - global_vars_seen_dict[checkprefix] = {} - if (checkprefix, nameless_value.check_prefix) in printed_prefixes: - break - if not glob_val_dict[checkprefix]: - continue - if nameless_value.check_prefix not in glob_val_dict[checkprefix]: - continue - if not glob_val_dict[checkprefix][nameless_value.check_prefix]: - continue - - check_lines = [] - global_vars_seen_before = [key for key in global_vars_seen.keys()] - for line in glob_val_dict[checkprefix][nameless_value.check_prefix]: - if _global_value_regex: - matched = False - for regex in _global_value_regex: - if re.match('^@' + regex + ' = ', line): - matched = True + checkprefixes = p[0] + # If not all checkprefixes of this run line produced the function we cannot check for it as it does not + # exist for this run line. A subset of the check prefixes might know about the function but only because + # other run lines created it. + if any( + map( + lambda checkprefix: func_name not in func_dict[checkprefix], + checkprefixes, + ) + ): + prefix_exclusions |= set(checkprefixes) + continue + + # prefix_exclusions is constructed, we can now emit the output + for p in prefix_list: + global_vars_seen = {} + checkprefixes = p[0] + for checkprefix in checkprefixes: + if checkprefix in global_vars_seen_dict: + global_vars_seen.update(global_vars_seen_dict[checkprefix]) + else: + global_vars_seen_dict[checkprefix] = {} + if checkprefix in printed_prefixes: break - if not matched: - continue - tmp = generalize_check_lines([line], is_analyze, set(), global_vars_seen) - check_line = '%s %s: %s' % (comment_marker, checkprefix, tmp[0]) - check_lines.append(check_line) - if not check_lines: - continue - output_lines.append(comment_marker + SEPARATOR) - for check_line in check_lines: - output_lines.append(check_line) + # Check if the prefix is excluded. + if checkprefix in prefix_exclusions: + continue + + # If we do not have output for this prefix we skip it. + if not func_dict[checkprefix][func_name]: + continue + + # Add some space between different check prefixes, but not after the last + # check line (before the test code). + if ginfo.is_asm(): + if len(printed_prefixes) != 0: + output_lines.append(comment_marker) + + if checkprefix not in global_vars_seen_dict: + global_vars_seen_dict[checkprefix] = {} + + global_vars_seen_before = [key for key in global_vars_seen.keys()] + + vars_seen = {} + printed_prefixes.append(checkprefix) + attrs = str(func_dict[checkprefix][func_name].attrs) + attrs = "" if attrs == "None" else attrs + if ginfo.get_version() > 1: + funcdef_attrs_and_ret = func_dict[checkprefix][ + func_name + ].funcdef_attrs_and_ret + else: + funcdef_attrs_and_ret = "" + + if attrs: + output_lines.append( + "%s %s: Function Attrs: %s" % (comment_marker, checkprefix, attrs) + ) + args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) + if args_and_sig: + args_and_sig = generalize_check_lines( + [args_and_sig], + ginfo, + vars_seen, + global_vars_seen, + preserve_names, + original_check_lines=[], + generalize_calls=generalize_calls, + )[0] + func_name_separator = func_dict[checkprefix][func_name].func_name_separator + if "[[" in args_and_sig: + # Captures in label lines are not supported, thus split into a -LABEL + # and a separate -SAME line that contains the arguments with captures. + args_and_sig_prefix = "" + if ginfo.get_version() >= 3 and args_and_sig.startswith("("): + # Ensure the "(" separating function name and arguments is in the + # label line. This is required in case of function names that are + # prefixes of each other. Otherwise, the label line for "foo" might + # incorrectly match on "foo.specialized". + args_and_sig_prefix = args_and_sig[0] + args_and_sig = args_and_sig[1:] + + # Removing args_and_sig from the label match line requires + # func_name_separator to be empty. Otherwise, the match will not work. + assert func_name_separator == "" + output_lines.append( + check_label_format + % ( + checkprefix, + funcdef_attrs_and_ret, + func_name, + args_and_sig_prefix, + func_name_separator, + ) + ) + output_lines.append( + "%s %s-SAME: %s" % (comment_marker, checkprefix, args_and_sig) + ) + else: + output_lines.append( + check_label_format + % ( + checkprefix, + funcdef_attrs_and_ret, + func_name, + args_and_sig, + func_name_separator, + ) + ) + func_body = str(func_dict[checkprefix][func_name]).splitlines() + if not func_body: + # We have filtered everything. + continue + + # For ASM output, just emit the check lines. + if ginfo.is_asm(): + body_start = 1 + if is_filtered: + # For filtered output we don't add "-NEXT" so don't add extra spaces + # before the first line. + body_start = 0 + else: + output_lines.append( + "%s %s: %s" % (comment_marker, checkprefix, func_body[0]) + ) + func_lines = generalize_check_lines( + func_body[body_start:], ginfo, vars_seen, global_vars_seen + ) + for func_line in func_lines: + if func_line.strip() == "": + output_lines.append( + "%s %s-EMPTY:" % (comment_marker, checkprefix) + ) + else: + check_suffix = "-NEXT" if not is_filtered else "" + output_lines.append( + "%s %s%s: %s" + % (comment_marker, checkprefix, check_suffix, func_line) + ) + # Remember new global variables we have not seen before + for key in global_vars_seen: + if key not in global_vars_seen_before: + global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] + break + # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well. + elif ginfo.is_analyze(): + func_body = generalize_check_lines( + func_body, ginfo, vars_seen, global_vars_seen + ) + for func_line in func_body: + if func_line.strip() == "": + output_lines.append( + "{} {}-EMPTY:".format(comment_marker, checkprefix) + ) + else: + check_suffix = "-NEXT" if not is_filtered else "" + output_lines.append( + "{} {}{}: {}".format( + comment_marker, checkprefix, check_suffix, func_line + ) + ) + + # Add space between different check prefixes and also before the first + # line of code in the test function. + output_lines.append(comment_marker) + + # Remember new global variables we have not seen before + for key in global_vars_seen: + if key not in global_vars_seen_before: + global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] + break + # For IR output, change all defs to FileCheck variables, so we're immune + # to variable naming fashions. + else: + func_body = generalize_check_lines( + func_body, + ginfo, + vars_seen, + global_vars_seen, + preserve_names, + original_check_lines=original_check_lines.get(checkprefix), + generalize_calls=generalize_calls, + ) + + # This could be selectively enabled with an optional invocation argument. + # Disabled for now: better to check everything. Be safe rather than sorry. + + # Handle the first line of the function body as a special case because + # it's often just noise (a useless asm comment or entry label). + # if func_body[0].startswith("#") or func_body[0].startswith("entry:"): + # is_blank_line = True + # else: + # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) + # is_blank_line = False + + is_blank_line = False + + for func_line in func_body: + if func_line.strip() == "": + is_blank_line = True + continue + # Do not waste time checking IR comments. + func_line = SCRUB_IR_COMMENT_RE.sub(r"", func_line) + + # Skip blank lines instead of checking them. + if is_blank_line: + output_lines.append( + "{} {}: {}".format( + comment_marker, checkprefix, func_line + ) + ) + else: + check_suffix = "-NEXT" if not is_filtered else "" + output_lines.append( + "{} {}{}: {}".format( + comment_marker, checkprefix, check_suffix, func_line + ) + ) + is_blank_line = False + + # Add space between different check prefixes and also before the first + # line of code in the test function. + output_lines.append(comment_marker) + + # Remember new global variables we have not seen before + for key in global_vars_seen: + if key not in global_vars_seen_before: + global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] + break + return printed_prefixes + +def add_ir_checks( + output_lines, + comment_marker, + prefix_list, + func_dict, + func_name, + preserve_names, + function_sig, + ginfo: GeneralizerInfo, + global_vars_seen_dict, + is_filtered, + original_check_lines={}, + generalize_calls=False, +): + assert ginfo.is_ir() + # Label format is based on IR string. + if function_sig and ginfo.get_version() > 1: + function_def_regex = "define %s" + elif function_sig: + function_def_regex = "define {{[^@]+}}%s" + else: + function_def_regex = "%s" + check_label_format = "{} %s-LABEL: {}@%s%s%s".format( + comment_marker, function_def_regex + ) + return add_checks( + output_lines, + comment_marker, + prefix_list, + func_dict, + func_name, + check_label_format, + ginfo, + global_vars_seen_dict, + is_filtered, + preserve_names, + original_check_lines=original_check_lines, + generalize_calls=generalize_calls, + ) + +def add_analyze_checks( + output_lines, + comment_marker, + prefix_list, + func_dict, + func_name, + ginfo: GeneralizerInfo, + is_filtered, +): + assert ginfo.is_analyze() + check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker) + global_vars_seen_dict = {} + return add_checks( + output_lines, + comment_marker, + prefix_list, + func_dict, + func_name, + check_label_format, + ginfo, + global_vars_seen_dict, + is_filtered, + ) + +def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo): + for nameless_value in ginfo.get_nameless_values(): + if nameless_value.global_ir_rhs_regexp is None: + continue - printed_prefixes.add((checkprefix, nameless_value.check_prefix)) + lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp + rhs_re_str = nameless_value.global_ir_rhs_regexp + + global_ir_value_re_str = r"^" + lhs_re_str + r"\s=\s" + rhs_re_str + r"$" + global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) + lines = [] + for m in global_ir_value_re.finditer(raw_tool_output): + # Attach the substring's start index so that CHECK lines + # can be sorted properly even if they are matched by different nameless values. + # This is relevant for GLOB and GLOBNAMED since they may appear interlaced. + lines.append((m.start(), m.group(0))) + + for prefix in prefixes: + if glob_val_dict[prefix] is None: + continue + if nameless_value.check_prefix in glob_val_dict[prefix]: + if lines == glob_val_dict[prefix][nameless_value.check_prefix]: + continue + if prefix == prefixes[-1]: + warn("Found conflicting asm under the same prefix: %r!" % (prefix,)) + else: + glob_val_dict[prefix][nameless_value.check_prefix] = None + continue + glob_val_dict[prefix][nameless_value.check_prefix] = lines + +def filter_globals_according_to_preference( + global_val_lines_w_index, global_vars_seen, nameless_value, global_check_setting +): + if global_check_setting == "none": + return [] + if global_check_setting == "all": + return global_val_lines_w_index + assert global_check_setting == "smart" + + if nameless_value.check_key == "#": + # attribute sets are usually better checked by --check-attributes + return [] + + def extract(line, nv): + p = ( + "^" + + nv.ir_prefix + + "(" + + nv.ir_regexp + + ") = (" + + nv.global_ir_rhs_regexp + + ")" + ) + match = re.match(p, line) + return (match.group(1), re.findall(nv.ir_regexp, match.group(2))) + + transitively_visible = set() + contains_refs_to = {} + + def add(var): + nonlocal transitively_visible + nonlocal contains_refs_to + if var in transitively_visible: + return + transitively_visible.add(var) + if not var in contains_refs_to: + return + for x in contains_refs_to[var]: + add(x) + + for i, line in global_val_lines_w_index: + (var, refs) = extract(line, nameless_value) + contains_refs_to[var] = refs + for var, check_key in global_vars_seen: + if check_key != nameless_value.check_key: + continue + add(var) + return [ + (i, line) + for i, line in global_val_lines_w_index + if extract(line, nameless_value)[0] in transitively_visible + ] + +METADATA_FILTERS = [ + ( + r"(?<=\")(.+ )?(\w+ version )[\d.]+(?:[^\" ]*)(?: \([^)]+\))?", + r"{{.*}}\2{{.*}}", + ), # preface with glob also, to capture optional CLANG_VENDOR + (r'(!DIFile\(filename: ".+", directory: )".+"', r"\1{{.*}}"), +] +METADATA_FILTERS_RE = [(re.compile(f), r) for (f, r) in METADATA_FILTERS] - # Remembe new global variables we have not seen before - for key in global_vars_seen: - if key not in global_vars_seen_before: - global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] - break +def filter_unstable_metadata(line): + for f, replacement in METADATA_FILTERS_RE: + line = f.sub(replacement, line) + return line - if printed_prefixes: +def flush_current_checks(output_lines, new_lines_w_index, comment_marker): + if not new_lines_w_index: + return output_lines.append(comment_marker + SEPARATOR) + new_lines_w_index.sort() + for _, line in new_lines_w_index: + output_lines.append(line) + new_lines_w_index.clear() + +def add_global_checks( + glob_val_dict, + comment_marker, + prefix_list, + output_lines, + ginfo: GeneralizerInfo, + global_vars_seen_dict, + preserve_names, + is_before_functions, + global_check_setting, +): + printed_prefixes = set() + output_lines_loc = {} # Allows GLOB and GLOBNAMED to be sorted correctly + for nameless_value in ginfo.get_nameless_values(): + if nameless_value.global_ir_rhs_regexp is None: + continue + if nameless_value.is_before_functions != is_before_functions: + continue + for p in prefix_list: + global_vars_seen = {} + checkprefixes = p[0] + if checkprefixes is None: + continue + for checkprefix in checkprefixes: + if checkprefix in global_vars_seen_dict: + global_vars_seen.update(global_vars_seen_dict[checkprefix]) + else: + global_vars_seen_dict[checkprefix] = {} + if (checkprefix, nameless_value.check_prefix) in printed_prefixes: + break + if not glob_val_dict[checkprefix]: + continue + if nameless_value.check_prefix not in glob_val_dict[checkprefix]: + continue + if not glob_val_dict[checkprefix][nameless_value.check_prefix]: + continue + + check_lines = [] + global_vars_seen_before = [key for key in global_vars_seen.keys()] + lines_w_index = glob_val_dict[checkprefix][nameless_value.check_prefix] + lines_w_index = filter_globals_according_to_preference( + lines_w_index, + global_vars_seen_before, + nameless_value, + global_check_setting, + ) + for i, line in lines_w_index: + if _global_value_regex: + matched = False + for regex in _global_value_regex: + if re.match("^@" + regex + " = ", line) or re.match( + "^!" + regex + " = ", line + ): + matched = True + break + if not matched: + continue + [new_line] = generalize_check_lines( + [line], + ginfo, + {}, + global_vars_seen, + preserve_names, + unstable_globals_only=True, + ) + new_line = filter_unstable_metadata(new_line) + check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line) + check_lines.append((i, check_line)) + if not check_lines: + continue + + if not checkprefix in output_lines_loc: + output_lines_loc[checkprefix] = [] + if not nameless_value.interlaced_with_previous: + flush_current_checks( + output_lines, output_lines_loc[checkprefix], comment_marker + ) + for check_line in check_lines: + output_lines_loc[checkprefix].append(check_line) + + printed_prefixes.add((checkprefix, nameless_value.check_prefix)) + + # Remembe new global variables we have not seen before + for key in global_vars_seen: + if key not in global_vars_seen_before: + global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] + break + + if printed_prefixes: + for p in prefix_list: + if p[0] is None: + continue + for checkprefix in p[0]: + if checkprefix not in output_lines_loc: + continue + flush_current_checks( + output_lines, output_lines_loc[checkprefix], comment_marker + ) + break + output_lines.append(comment_marker + SEPARATOR) + return printed_prefixes def check_prefix(prefix): - if not PREFIX_RE.match(prefix): - hint = "" - if ',' in prefix: - hint = " Did you mean '--check-prefixes=" + prefix + "'?" - warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) % - (prefix)) + if not PREFIX_RE.match(prefix): + hint = "" + if "," in prefix: + hint = " Did you mean '--check-prefixes=" + prefix + "'?" + warn( + ( + "Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + + hint + ) + % (prefix) + ) + +def get_check_prefixes(filecheck_cmd): + check_prefixes = [ + item + for m in CHECK_PREFIX_RE.finditer(filecheck_cmd) + for item in m.group(1).split(",") + ] + if not check_prefixes: + check_prefixes = ["CHECK"] + return check_prefixes def verify_filecheck_prefixes(fc_cmd): - fc_cmd_parts = fc_cmd.split() - for part in fc_cmd_parts: - if "check-prefix=" in part: - prefix = part.split('=', 1)[1] - check_prefix(prefix) - elif "check-prefixes=" in part: - prefixes = part.split('=', 1)[1].split(',') - for prefix in prefixes: - check_prefix(prefix) - if prefixes.count(prefix) > 1: - warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,)) + fc_cmd_parts = fc_cmd.split() + for part in fc_cmd_parts: + if "check-prefix=" in part: + prefix = part.split("=", 1)[1] + check_prefix(prefix) + elif "check-prefixes=" in part: + prefixes = part.split("=", 1)[1].split(",") + for prefix in prefixes: + check_prefix(prefix) + if prefixes.count(prefix) > 1: + warn( + "Supplied prefix '%s' is not unique in the prefix list." + % (prefix,) + ) def get_autogennote_suffix(parser, args): - autogenerated_note_args = '' - for action in parser._actions: - if not hasattr(args, action.dest): - continue # Ignore options such as --help that aren't included in args - # Ignore parameters such as paths to the binary or the list of tests - if action.dest in ('tests', 'update_only', 'tool_binary', 'opt_binary', - 'llc_binary', 'clang', 'opt', 'llvm_bin', 'verbose'): - continue - value = getattr(args, action.dest) - if action.const is not None: # action stores a constant (usually True/False) - # Skip actions with different constant values (this happens with boolean - # --foo/--no-foo options) - if value != action.const: - continue - if parser.get_default(action.dest) == value: - continue # Don't add default values - if action.dest == 'filters': - # Create a separate option for each filter element. The value is a list - # of Filter objects. - for elem in value: - opt_name = 'filter-out' if elem.is_filter_out else 'filter' - opt_value = elem.pattern() - new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) - if new_arg not in autogenerated_note_args: - autogenerated_note_args += new_arg - else: - autogenerated_note_args += action.option_strings[0] + ' ' - if action.const is None: # action takes a parameter - if action.nargs == '+': - value = ' '.join(map(lambda v: '"' + v.strip('"') + '"', value)) - autogenerated_note_args += '%s ' % value - if autogenerated_note_args: - autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1]) - return autogenerated_note_args + autogenerated_note_args = "" + for action in parser._actions: + if not hasattr(args, action.dest): + continue # Ignore options such as --help that aren't included in args + # Ignore parameters such as paths to the binary or the list of tests + if action.dest in ( + "tests", + "update_only", + "tool_binary", + "opt_binary", + "llc_binary", + "clang", + "opt", + "llvm_bin", + "verbose", + "force_update", + "reset_variable_names", + "llvm_mc_binary", + ): + continue + value = getattr(args, action.dest) + if action.dest == "check_globals": + default_value = "none" if args.version < 4 else "smart" + if value == default_value: + continue + autogenerated_note_args += action.option_strings[0] + " " + if args.version < 4 and value == "all": + continue + autogenerated_note_args += "%s " % value + continue + if action.const is not None: # action stores a constant (usually True/False) + # Skip actions with different constant values (this happens with boolean + # --foo/--no-foo options) + if value != action.const: + continue + if parser.get_default(action.dest) == value: + continue # Don't add default values + if action.dest == "function_signature" and args.version >= 2: + continue # Enabled by default in version 2 + if action.dest == "filters": + # Create a separate option for each filter element. The value is a list + # of Filter objects. + for elem in value: + opt_name = "filter-out" if elem.is_filter_out else "filter" + opt_value = elem.pattern() + new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"')) + if new_arg not in autogenerated_note_args: + autogenerated_note_args += new_arg + else: + autogenerated_note_args += action.option_strings[0] + " " + if action.const is None: # action takes a parameter + if action.nargs == "+": + value = " ".join(map(lambda v: '"' + v.strip('"') + '"', value)) + autogenerated_note_args += "%s " % value + if autogenerated_note_args: + autogenerated_note_args = " %s %s" % ( + UTC_ARGS_KEY, + autogenerated_note_args[:-1], + ) + return autogenerated_note_args def check_for_command(line, parser, args, argv, argparse_callback): - cmd_m = UTC_ARGS_CMD.match(line) - if cmd_m: - for option in shlex.split(cmd_m.group('cmd').strip()): - if option: - argv.append(option) - args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv)) - if argparse_callback is not None: - argparse_callback(args) - return args, argv + cmd_m = UTC_ARGS_CMD.match(line) + if cmd_m: + for option in shlex.split(cmd_m.group("cmd").strip()): + if option: + argv.append(option) + args = parse_args(parser, filter(lambda arg: arg not in args.tests, argv)) + if argparse_callback is not None: + argparse_callback(args) + return args, argv def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): - result = get_arg_to_check(test_info.args) - if not result and is_global: - # See if this has been specified via UTC_ARGS. This is a "global" option - # that affects the entire generation of test checks. If it exists anywhere - # in the test, apply it to everything. - saw_line = False - for line_info in test_info.ro_iterlines(): - line = line_info.line - if not line.startswith(';') and line.strip() != '': - saw_line = True - result = get_arg_to_check(line_info.args) - if result: - if warn and saw_line: - # We saw the option after already reading some test input lines. - # Warn about it. - print('WARNING: Found {} in line following test start: '.format(arg_string) - + line, file=sys.stderr) - print('WARNING: Consider moving {} to top of file'.format(arg_string), - file=sys.stderr) - break - return result + result = get_arg_to_check(test_info.args) + if not result and is_global: + # See if this has been specified via UTC_ARGS. This is a "global" option + # that affects the entire generation of test checks. If it exists anywhere + # in the test, apply it to everything. + saw_line = False + for line_info in test_info.ro_iterlines(): + line = line_info.line + if not line.startswith(";") and line.strip() != "": + saw_line = True + result = get_arg_to_check(line_info.args) + if result: + if warn and saw_line: + # We saw the option after already reading some test input lines. + # Warn about it. + print( + "WARNING: Found {} in line following test start: ".format( + arg_string + ) + + line, + file=sys.stderr, + ) + print( + "WARNING: Consider moving {} to top of file".format(arg_string), + file=sys.stderr, + ) + break + return result def dump_input_lines(output_lines, test_info, prefix_set, comment_string): - for input_line_info in test_info.iterlines(output_lines): - line = input_line_info.line - args = input_line_info.args - if line.strip() == comment_string: - continue - if line.strip() == comment_string + SEPARATOR: - continue - if line.lstrip().startswith(comment_string): - m = CHECK_RE.match(line) - if m and m.group(1) in prefix_set: - continue - output_lines.append(line.rstrip('\n')) - -def add_checks_at_end(output_lines, prefix_list, func_order, - comment_string, check_generator): - added = set() - generated_prefixes = set() - for prefix in prefix_list: - prefixes = prefix[0] - tool_args = prefix[1] - for prefix in prefixes: - for func in func_order[prefix]: - # The func order can contain the same functions multiple times. - # If we see one again we are done. - if (func, prefix) in added: + for input_line_info in test_info.iterlines(output_lines): + line = input_line_info.line + args = input_line_info.args + if line.strip() == comment_string: continue - if added: - output_lines.append(comment_string) - - # The add_*_checks routines expect a run list whose items are - # tuples that have a list of prefixes as their first element and - # tool command args string as their second element. They output - # checks for each prefix in the list of prefixes. By doing so, it - # implicitly assumes that for each function every run line will - # generate something for that function. That is not the case for - # generated functions as some run lines might not generate them - # (e.g. -fopenmp vs. no -fopenmp). - # - # Therefore, pass just the prefix we're interested in. This has - # the effect of generating all of the checks for functions of a - # single prefix before moving on to the next prefix. So checks - # are ordered by prefix instead of by function as in "normal" - # mode. - for generated_prefix in check_generator(output_lines, - [([prefix], tool_args)], func): - added.add((func, generated_prefix)) - generated_prefixes.add(generated_prefix) - return generated_prefixes + if line.strip() == comment_string + SEPARATOR: + continue + if line.lstrip().startswith(comment_string): + m = CHECK_RE.match(line) + if m and m.group(1) in prefix_set: + continue + output_lines.append(line.rstrip("\n")) + +def add_checks_at_end( + output_lines, prefix_list, func_order, comment_string, check_generator +): + added = set() + generated_prefixes = set() + for prefix in prefix_list: + prefixes = prefix[0] + tool_args = prefix[1] + for prefix in prefixes: + for func in func_order[prefix]: + # The func order can contain the same functions multiple times. + # If we see one again we are done. + if (func, prefix) in added: + continue + if added: + output_lines.append(comment_string) + + # The add_*_checks routines expect a run list whose items are + # tuples that have a list of prefixes as their first element and + # tool command args string as their second element. They output + # checks for each prefix in the list of prefixes. By doing so, it + # implicitly assumes that for each function every run line will + # generate something for that function. That is not the case for + # generated functions as some run lines might not generate them + # (e.g. -fopenmp vs. no -fopenmp). + # + # Therefore, pass just the prefix we're interested in. This has + # the effect of generating all of the checks for functions of a + # single prefix before moving on to the next prefix. So checks + # are ordered by prefix instead of by function as in "normal" + # mode. + for generated_prefix in check_generator( + output_lines, [([prefix], tool_args)], func + ): + added.add((func, generated_prefix)) + generated_prefixes.add(generated_prefix) + return generated_prefixes diff --git a/tool/dumper/CMakeLists.txt b/tool/dumper/CMakeLists.txt index 03c0c75372..9cc9e9b623 100644 --- a/tool/dumper/CMakeLists.txt +++ b/tool/dumper/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -25,6 +25,8 @@ project(DUMPER C CXX) +set(LLPC_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..") + ### Create DUMPER Library ################################################################################################ add_library(dumper_base OBJECT "") @@ -56,10 +58,10 @@ target_include_directories(dumper_base PUBLIC . PUBLIC - ${XGL_PAL_PATH}/inc/core - ${XGL_PAL_PATH}/inc/util + ${PAL_SOURCE_DIR}/inc/core + ${PAL_SOURCE_DIR}/inc/util ${LLVM_INCLUDE_DIRS} - ${XGL_VKGC_PATH}/util + ${LLPC_SOURCE_DIR}/util ) target_sources(dumper_base PRIVATE diff --git a/tool/dumper/vkgcPipelineDumper.cpp b/tool/dumper/vkgcPipelineDumper.cpp index c35345e9c1..e0586b8b9a 100644 --- a/tool/dumper/vkgcPipelineDumper.cpp +++ b/tool/dumper/vkgcPipelineDumper.cpp @@ -67,6 +67,7 @@ std::ostream &operator<<(std::ostream &out, VkProvokingVertexModeEXT provokingVe std::ostream &operator<<(std::ostream &out, ResourceLayoutScheme layout); std::ostream &operator<<(std::ostream &out, ThreadGroupSwizzleMode threadGroupSwizzleMode); std::ostream &operator<<(std::ostream &out, InvariantLoads invariants); +std::ostream &operator<<(std::ostream &out, LlvmScheduleStrategy strategy); template std::ostream &operator<<(std::ostream &out, ElfReader &reader); template raw_ostream &operator<<(raw_ostream &out, ElfReader &reader); @@ -716,6 +717,7 @@ void PipelineDumper::dumpPipelineShaderInfo(const PipelineShaderInfo *shaderInfo dumpFile << "options.viewIndexFromDeviceIndex = " << shaderInfo->options.viewIndexFromDeviceIndex << "\n"; dumpFile << "options.forceUnderflowPrevention = " << shaderInfo->options.forceUnderflowPrevention << "\n"; dumpFile << "options.forceMemoryBarrierScope = " << shaderInfo->options.forceMemoryBarrierScope << "\n"; + dumpFile << "options.scheduleStrategy = " << shaderInfo->options.scheduleStrategy << "\n"; dumpFile << "\n"; // clang-format on } @@ -854,6 +856,55 @@ void PipelineDumper::DumpFragmentOutputs(PipelineDumpFile *dumpFile, const uint8 dumpFile->dumpFile << "\n\n"; } +// ===================================================================================================================== +// Dump pm4crc hash. It hashes up to and including the first s_endpgm with an immediate 0 operand or 16KB +// +// @param dumpFile : Directory of pipeline dump +// @param gfxIp : Graphics IP version info +// @param pipelineBin : Pipeline binary (ELF) +void PipelineDumper::DumpPm4Crc(PipelineDumpFile *dumpFile, GfxIpVersion gfxIp, const BinaryData *pipelineBin) { + if (!dumpFile) + return; + ElfReader reader(gfxIp); + size_t readSize = 0; + if (reader.ReadFromBuffer(pipelineBin->pCode, &readSize) == Result::Success) { + unsigned sectionCount = reader.getSectionCount(); + bool sortSection = reader.getMap().size() == sectionCount; + for (unsigned idx = 0; idx < sectionCount; ++idx) { + typename ElfReader::SectionBuffer *section = nullptr; + Result result = Result::Success; + unsigned secIdx = idx; + if (sortSection) { + result = reader.getSectionDataBySortingIndex(idx, &secIdx, §ion); + } else { + result = reader.getSectionDataBySectionIndex(idx, §ion); + } + assert(result == Result::Success); + if (strcmp(section->name, TextName) == 0) { + // .text section + std::vector symbols; + reader.GetSymbolsBySectionIndex(secIdx, symbols); + dumpFile->dumpFile << "\n"; + for (auto sym : symbols) { + // S_ENDPGM hardware opcode value + static const uint32_t endPgm = (gfxIp.major == 11 || gfxIp.major == 12) ? 0xBFB00000 : 0xBF810000; + // Hash up to 16KB + const unsigned codeSizeInbyte = std::min(sym.size, UINT64_C(16384)); + + const uint32_t *symCode = reinterpret_cast(voidPtrInc(section->data, sym.value)); + unsigned endPos = 0; + const unsigned codeSizeInDw = codeSizeInbyte / sizeof(uint32_t); + while (endPos < codeSizeInDw && symCode[endPos++] != endPgm) { + // nothing + } + auto crc = calculateCrc64(symCode, endPos * sizeof(uint32_t)); + dumpFile->dumpFile << ";" << sym.pSymName << "_pm4Crc = " << std::hex << crc << "\n"; + } + } + } + } +} + // ===================================================================================================================== // Dumps LLPC version info to file // @@ -960,9 +1011,11 @@ void PipelineDumper::dumpPipelineOptions(const PipelineOptions *options, std::os dumpFile << glStatePrefix << "emulateWideLineStipple = " << options->getGlState().emulateWideLineStipple << "\n"; dumpFile << glStatePrefix << "enablePointSmooth = " << options->getGlState().enablePointSmooth << "\n"; dumpFile << glStatePrefix << "enableRemapLocation = " << options->getGlState().enableRemapLocation << "\n"; + dumpFile << glStatePrefix << "enableDepthCompareParam = " << options->getGlState().enableDepthCompareParam << "\n"; dumpFile << "options.enablePrimGeneratedQuery = " << options->enablePrimGeneratedQuery << "\n"; dumpFile << "options.disablePerCompFetch = " << options->disablePerCompFetch << "\n"; dumpFile << "options.optimizePointSizeWrite = " << options->optimizePointSizeWrite << "\n"; + dumpFile << "options.padBufferSizeToNextDword = " << options->padBufferSizeToNextDword << "\n"; // Output compile time constant info if (options->compileConstInfo) { @@ -2020,8 +2073,10 @@ void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options hasher->Update(options->getGlState().emulateWideLineStipple); hasher->Update(options->getGlState().enablePointSmooth); hasher->Update(options->getGlState().enableRemapLocation); + hasher->Update(options->getGlState().enableDepthCompareParam); // disablePerCompFetch has been handled in updateHashForNonFragmentState hasher->Update(options->optimizePointSizeWrite); + hasher->Update(options->padBufferSizeToNextDword); hasher->Update(options->compileConstInfo != nullptr); if (options->compileConstInfo != nullptr) { hasher->Update(options->compileConstInfo->numCompileTimeConstants); @@ -2126,6 +2181,7 @@ void PipelineDumper::updateHashForPipelineShaderInfo(ShaderStage stage, const Pi hasher->Update(options.viewIndexFromDeviceIndex); hasher->Update(options.forceUnderflowPrevention); hasher->Update(options.forceMemoryBarrierScope); + hasher->Update(options.scheduleStrategy); } } } @@ -2701,6 +2757,26 @@ std::ostream &operator<<(std::ostream &out, WaveBreakSize waveBreakSize) { return out << string; } +// ===================================================================================================================== +// Translates enum "LlvmScheduleStrategy" to string and output to ostream. +// +// @param [out] out : Output stream +// @param strategy: LLVM instruction schedule strategy +std::ostream &operator<<(std::ostream &out, LlvmScheduleStrategy strategy) { + const char *string = nullptr; + switch (strategy) { + CASE_CLASSENUM_TO_STRING(LlvmScheduleStrategy, None) + CASE_CLASSENUM_TO_STRING(LlvmScheduleStrategy, MaxIlp) + CASE_CLASSENUM_TO_STRING(LlvmScheduleStrategy, MaxMemoryClause) + break; + default: + llvm_unreachable("Should never be called!"); + break; + } + + return out << string; +} + // ===================================================================================================================== // Translates enum "ShadowDescriptorTableUsage" to string and output to ostream. // diff --git a/tool/dumper/vkgcPipelineDumper.h b/tool/dumper/vkgcPipelineDumper.h index aebe21832d..a3cd178eb9 100644 --- a/tool/dumper/vkgcPipelineDumper.h +++ b/tool/dumper/vkgcPipelineDumper.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -68,6 +68,8 @@ class PipelineDumper { static void DumpFragmentOutputs(PipelineDumpFile *dumpFile, const uint8_t *data, uint32_t size); + static void DumpPm4Crc(PipelineDumpFile *dumpFile, GfxIpVersion gfxIp, const BinaryData *pipelineBin); + static MetroHash::Hash generateHashForGraphicsPipeline(const GraphicsPipelineBuildInfo *pipeline, bool isCacheHash, UnlinkedShaderStage unlinkedShaderType = UnlinkedStageCount); diff --git a/tool/update_llpc_test_checks.py b/tool/update_llpc_test_checks.py index b800653262..2de0e2fac0 100755 --- a/tool/update_llpc_test_checks.py +++ b/tool/update_llpc_test_checks.py @@ -56,299 +56,450 @@ # PAL metadata support PAL_METADATA_RE = re.compile( - r'^[ \t]*\.amdgpu_pal_metadata\n' - r'---\n' - r'(?P.*?)' - r'^...\n', - flags=(re.MULTILINE | re.DOTALL)) + r"^[ \t]*\.amdgpu_pal_metadata\n" r"---\n" r"(?P.*?)" r"^...\n", + flags=(re.MULTILINE | re.DOTALL), +) -YAML_INDENT_RE = re.compile(r'(?P[ -]*)((?P[^:]+):)?') -YAML_SUFFIX_RE = re.compile(r':[^:]*$') +YAML_INDENT_RE = re.compile(r"(?P[ -]*)((?P[^:]+):)?") +YAML_SUFFIX_RE = re.compile(r":[^:]*$") -HEX_RE = re.compile(r'0x[0-9a-f]+') +HEX_RE = re.compile(r"0x[0-9a-f]+") def process_pal_metadata(pal_metadata_dict, prefixes, raw_tool_output): - m = PAL_METADATA_RE.search(raw_tool_output) - if not m: - common.warn(f"Did not find PAL metadata") - return - - metadata_in_lines = m.group("metadata").splitlines() - scrubbed_lines = [] - - scope = [] - def get_scope_path(): - return ''.join(name for name, _ in scope) - - for line in metadata_in_lines: - m = YAML_INDENT_RE.match(line) - indent = len(m.group("indent")) - scope = [(name, ind) for name, ind in scope if ind < indent] - - if m.group("name") is not None: - scope.append((m.group("name"), indent)) - - path = get_scope_path() - if 'hash' in path: - line = HEX_RE.subn("0x{{[0-9a-f]+}}", line)[0] - elif 'llpc_version' in path or 'PGM_CHKSUM' in path: - line = YAML_SUFFIX_RE.subn(": {{.*}}", line)[0] - - scrubbed_lines.append(line) - - metadata = '\n'.join(scrubbed_lines) - for prefix in prefixes: - if prefix not in pal_metadata_dict: - pal_metadata_dict[prefix] = metadata - else: - if pal_metadata_dict[prefix] != metadata: - pal_metadata_dict[prefix] = None - -def add_pal_metadata_checks(pal_metadata_dict, comment_prefix, prefix_list, - output_lines: List[str]): - written_prefixes = set() - for prefix_list_entry in prefix_list: - prefixes = prefix_list_entry[0] - for prefix in prefixes: - if prefix in pal_metadata_dict and pal_metadata_dict[prefix] is not None: - break - else: - common.warn(f"Did not find PAL metadata for prefix list {prefixes}") - return + m = PAL_METADATA_RE.search(raw_tool_output) + if not m: + common.warn(f"Did not find PAL metadata") + return + + metadata_in_lines = m.group("metadata").splitlines() + scrubbed_lines = [] + + scope = [] + + def get_scope_path(): + return "".join(name for name, _ in scope) - if prefix in written_prefixes: - continue + for line in metadata_in_lines: + m = YAML_INDENT_RE.match(line) + indent = len(m.group("indent")) + scope = [(name, ind) for name, ind in scope if ind < indent] - output_lines.append(comment_prefix) - output_lines.append(f'{comment_prefix} {prefix}-LABEL: .amdgpu_pal_metadata') - output_lines.append(f'{comment_prefix} {prefix}-NEXT: ---') - for line in pal_metadata_dict[prefix].splitlines(): - output_lines.append(f'{comment_prefix} {prefix}-NEXT: {line}') - output_lines.append(f'{comment_prefix} {prefix}-NEXT: ...') + if m.group("name") is not None: + scope.append((m.group("name"), indent)) - written_prefixes.add(prefix) + path = get_scope_path() + if "hash" in path: + line = HEX_RE.subn("0x{{[0-9a-f]+}}", line)[0] + elif "llpc_version" in path or "PGM_CHKSUM" in path: + line = YAML_SUFFIX_RE.subn(": {{.*}}", line)[0] + + scrubbed_lines.append(line) + + metadata = "\n".join(scrubbed_lines) + for prefix in prefixes: + if prefix not in pal_metadata_dict: + pal_metadata_dict[prefix] = metadata + else: + if pal_metadata_dict[prefix] != metadata: + pal_metadata_dict[prefix] = None + +def add_pal_metadata_checks( + pal_metadata_dict, comment_prefix, prefix_list, output_lines: List[str] +): + written_prefixes = set() + for prefix_list_entry in prefix_list: + prefixes = prefix_list_entry[0] + for prefix in prefixes: + if prefix in pal_metadata_dict and pal_metadata_dict[prefix] is not None: + break + else: + common.warn(f"Did not find PAL metadata for prefix list {prefixes}") + return + + if prefix in written_prefixes: + continue + + output_lines.append(comment_prefix) + output_lines.append(f"{comment_prefix} {prefix}-LABEL: .amdgpu_pal_metadata") + output_lines.append(f"{comment_prefix} {prefix}-NEXT: ---") + for line in pal_metadata_dict[prefix].splitlines(): + output_lines.append(f"{comment_prefix} {prefix}-NEXT: {line}") + output_lines.append(f"{comment_prefix} {prefix}-NEXT: ...") + + written_prefixes.add(prefix) ############################################################################### # Assembly support ASM_FUNCTION_AMDGPU_RE = re.compile( r'^_?(?P[^:\n]+):[ \t]*(?:;+[ \t]*@"?(?P=func)"?)?\n[^:]*?' - r'(?P.*?)\n' # (body of the function) + r"(?P.*?)\n" # (body of the function) # This list is incomplete - r'^\s*(\.Lfunc_end[0-9]+:\n|\.section)', - flags=(re.M | re.S)) + r"^\s*(\.Lfunc_end[0-9]+:\n|\.section)", + flags=(re.M | re.S), +) def scrub_asm_amdgpu(asm, *args): - # Scrub runs of whitespace out of the assembly, but leave the leading - # whitespace in place. - asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm) - # Expand the tabs used for indentation. - asm = str.expandtabs(asm, 2) - # Strip trailing whitespace. - asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm) - return asm - -def add_asm_checks(output_lines, comment_marker, prefix_list, func_dict, - func_name, global_vars_seen_dict, args, is_filtered): - # Label format is based on ASM string. - check_label_format = '{} %s-LABEL: %s%s%s'.format(comment_marker) - return common.add_checks(output_lines, comment_marker, prefix_list, func_dict, - func_name, check_label_format, True, False, - global_vars_seen_dict, is_filtered=is_filtered) - -def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, - func_name, global_vars_seen_dict, args, is_filtered): - return common.add_ir_checks(output_lines, comment_marker, prefix_list, - func_dict, func_name, False, args.function_signature, - global_vars_seen_dict, is_filtered) + # Scrub runs of whitespace out of the assembly, but leave the leading + # whitespace in place. + asm = common.SCRUB_WHITESPACE_RE.sub(r" ", asm) + # Expand the tabs used for indentation. + asm = str.expandtabs(asm, 2) + # Strip trailing whitespace. + asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r"", asm) + return asm + +def add_asm_checks( + output_lines, + comment_marker, + prefix_list, + func_dict, + func_name, + ginfo, + global_vars_seen_dict, + args, + is_filtered, + original_check_lines, +): + # Label format is based on ASM string. + check_label_format = "{} %s-LABEL: %s%s%s%s".format(comment_marker) + return common.add_checks( + output_lines, + comment_marker, + prefix_list, + func_dict, + func_name, + check_label_format, + ginfo, + global_vars_seen_dict, + is_filtered, + args.preserve_names, + original_check_lines, + ) + +def add_ir_checks( + output_lines, + comment_marker, + prefix_list, + func_dict, + func_name, + ginfo, + global_vars_seen_dict, + args, + is_filtered, + original_check_lines, +): + return common.add_ir_checks( + output_lines, + comment_marker, + prefix_list, + func_dict, + func_name, + args.preserve_names, + args.function_signature, + ginfo, + global_vars_seen_dict, + is_filtered, + original_check_lines, + args.generalize_calls, + ) COMMENT_PREFIXES_BY_FILE_SUFFIX = { - '.pipe': ';', - '.spvasm': ';', - '.ll': ';', - '.lgc': ';', - # Everything else defaults to '//' + ".pipe": ";", + ".spvasm": ";", + ".ll": ";", + ".lgc": ";", + # Everything else defaults to '//' } def get_comment_prefix(test_name: str, input_lines): - ext = os.path.splitext(test_name)[1] - return COMMENT_PREFIXES_BY_FILE_SUFFIX.get(ext, '//') + ext = os.path.splitext(test_name)[1] + return COMMENT_PREFIXES_BY_FILE_SUFFIX.get(ext, "//") def main(): - from argparse import RawTextHelpFormatter - parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) - parser.add_argument('--tool', default='amdllpc', - help='The name of the tool used to generate the test case (defaults to "amdllpc")') - parser.add_argument('--tool-binary', - help='The tool binary used to generate the test case') - parser.add_argument('--function', help='Only update functions whose name matches the given regex') - parser.add_argument('-p', '--preserve-names', action='store_true', - help='Do not scrub IR names') - parser.add_argument('--function-signature', action='store_true', - help='Keep function signature information around for the check line') - parser.add_argument('--scrub-attributes', action='store_true', - help='Remove attribute annotations (#0) from the end of check line') - parser.add_argument('--check-attributes', action='store_true', - help='Check "Function Attributes" for functions') - parser.add_argument('--check-globals', action='store_true', - help='Check global entries (global variables, metadata, attribute sets, ...) for functions') - parser.add_argument('--check-pal-metadata', action='store_true', - help='Check PAL metadata in output assembly') - parser.add_argument('tests', nargs='+') - initial_args = common.parse_commandline_args(parser) - - if initial_args.tool_binary: - tool_basename = os.path.basename(initial_args.tool_binary) - if not re.match(r'^%s(-\d+)?(\.exe)?$' % (initial_args.tool), tool_basename): - common.error('Unexpected tool name: ' + tool_basename) - sys.exit(1) - - for ti in common.itertests(initial_args.tests, parser, 'tool/update_llpc_test_checks.py', - comment_prefix_callback=get_comment_prefix): - # If requested we scrub trailing attribute annotations, e.g., '#0', together with whitespaces - if ti.args.scrub_attributes: - common.SCRUB_TRAILING_WHITESPACE_TEST_RE = common.SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE - else: - common.SCRUB_TRAILING_WHITESPACE_TEST_RE = common.SCRUB_TRAILING_WHITESPACE_RE - - tool_basename = ti.args.tool - tool_binary = tool_basename - if tool_basename == initial_args.tool and initial_args.tool_binary: - tool_binary = initial_args.tool_binary - - prefix_list = [] - for l in ti.run_lines: - if '|' not in l: - common.warn('Skipping unparsable RUN line: ' + l) - continue - - commands = [cmd.strip() for cmd in l.split('|')] - assert len(commands) >= 2 - if len(commands) > 2: - common.error('Complex pipes are unsupported') - sys.exit(1) - tool_cmd = commands[-2] - filecheck_cmd = commands[-1] - common.verify_filecheck_prefixes(filecheck_cmd) - if not tool_cmd.startswith(tool_basename + ' '): - common.warn('Skipping non-%s RUN line: %s' % (tool_basename, l)) - continue - - if not filecheck_cmd.startswith('FileCheck '): - common.warn('Skipping non-FileChecked RUN line: ' + l) - continue - - tool_cmd_args = tool_cmd[len(tool_basename):].strip() - - check_prefixes = [item for m in - common.CHECK_PREFIX_RE.finditer(filecheck_cmd) - for item in m.group(1).split(',')] - if not check_prefixes: - check_prefixes = ['CHECK'] - - # FIXME: We should use multiple check prefixes to common check lines. For - # now, we just ignore all but the last. - prefix_list.append((check_prefixes, tool_cmd_args)) - - global_vars_seen_dict = {} - builder = common.FunctionTestBuilder( - run_list=prefix_list, - flags=ti.args, - scrubber_args=[], - path=ti.path) - - function_re = None - scrubber = None - add_checks = None - is_backend = None - - pal_metadata_dict = {} - - for prefixes, tool_args in prefix_list: - common.debug('Extracted tool cmd: ' + tool_basename + ' ' + tool_args) - common.debug('Extracted FileCheck prefixes: ' + str(prefixes)) - - raw_tool_output = common.invoke_tool_only(tool_binary, tool_args, ti.path, - verbose=ti.args.verbose) - - is_ir = common.OPT_FUNCTION_RE.search(raw_tool_output) is not None - if is_ir: - assert is_backend is None or is_backend == False # Currently can't support a mix of RUN lines - function_re = common.OPT_FUNCTION_RE - scrubber = common.scrub_body - add_checks = add_ir_checks - is_backend = False - else: - assert is_backend is None or is_backend == True # Currently can't support a mix of RUN lines - function_re = ASM_FUNCTION_AMDGPU_RE - scrubber = scrub_asm_amdgpu - add_checks = add_asm_checks - is_backend = True - - builder.process_run_line(function_re, scrubber, raw_tool_output, prefixes, - is_backend) - builder.processed_prefixes(prefixes) - - if ti.args.check_pal_metadata: - if not is_backend: - common.error(f"{ti.path}: --check-pal-metadata only applies with asm output") - sys.exit(1) - - process_pal_metadata(pal_metadata_dict, prefixes, raw_tool_output) - - func_dict = builder.finish_and_get_func_dict() - prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) - common.debug('Rewriting FileCheck prefixes:', str(prefix_set)) - output_lines = [] - - # Generate the appropriate checks for each function. We need to emit - # these in the order according to the generated output so that CHECK-LABEL - # works properly. func_order provides that. - - # We can't predict where various passes might insert functions so we can't - # be sure the input function order is maintained. Therefore, first spit - # out all the source lines. - common.dump_input_lines(output_lines, ti, prefix_set, ti.comment_prefix) - - args = ti.args - - if args.check_globals: - common.add_global_checks(builder.global_var_dict(), ti.comment_prefix, - prefix_list, output_lines, global_vars_seen_dict, - args.preserve_names, True) - - # Filter out functions - func_order = builder.func_order() - if ti.args.function: - filter_re = re.compile(ti.args.function) - new_func_order = {} - for prefix, func_names in func_order.items(): - new_func_order[prefix] = [ - func_name for func_name in func_names - if filter_re.search(func_name) - ] - func_order = new_func_order - - # Now generate all the checks. - common.add_checks_at_end(output_lines, prefix_list, func_order, - ti.comment_prefix, lambda my_output_lines, prefixes, func: - add_checks(my_output_lines, ti.comment_prefix, - prefixes, func_dict, func, - global_vars_seen_dict, args, - is_filtered=builder.is_filtered())) - - if args.check_globals: - common.add_global_checks(builder.global_var_dict(), ti.comment_prefix, - prefix_list, output_lines, global_vars_seen_dict, - args.preserve_names, False) - - if args.check_pal_metadata: - add_pal_metadata_checks(pal_metadata_dict, ti.comment_prefix, prefix_list, - output_lines) - - common.debug('Writing %d lines to %s...' % (len(output_lines), ti.path)) - - with open(ti.path, 'wb') as f: - f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) - -if __name__ == '__main__': - main() + from argparse import RawTextHelpFormatter + + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=RawTextHelpFormatter + ) + parser.add_argument( + "--tool", + default="amdllpc", + help='The name of the tool used to generate the test case (defaults to "amdllpc")', + ) + parser.add_argument( + "--tool-binary", help="The tool binary used to generate the test case" + ) + parser.add_argument( + "--function", help="Only update functions whose name matches the given regex" + ) + parser.add_argument( + "-p", "--preserve-names", action="store_true", help="Do not scrub IR names" + ) + parser.add_argument( + "--function-signature", + action="store_true", + help="Keep function signature information around for the check line", + ) + parser.add_argument( + "--scrub-attributes", + action="store_true", + help="Remove attribute annotations (#0) from the end of check line", + ) + parser.add_argument( + "--check-attributes", + action="store_true", + help='Check "Function Attributes" for functions', + ) + parser.add_argument( + "--check-globals", + nargs="?", + const="all", + default="default", + choices=["none", "smart", "all"], + help="Check global entries (global variables, metadata, attribute sets, ...) for functions", + ) + parser.add_argument( + "--check-pal-metadata", + action="store_true", + help="Check PAL metadata in output assembly", + ) + parser.add_argument( + "--reset-variable-names", + action="store_true", + help="Reset all variable names to correspond closely to the variable names in IR. " + "This tends to result in larger diffs.", + ) + parser.add_argument("tests", nargs="+") + initial_args = common.parse_commandline_args(parser) + + if initial_args.tool_binary: + tool_basename = os.path.basename(initial_args.tool_binary) + if not re.match(r"^%s(-\d+)?(\.exe)?$" % (initial_args.tool), tool_basename): + common.error("Unexpected tool name: " + tool_basename) + sys.exit(1) + + for ti in common.itertests( + initial_args.tests, + parser, + "tool/update_llpc_test_checks.py", + comment_prefix_callback=get_comment_prefix, + ): + # If requested we scrub trailing attribute annotations, e.g., '#0', together with whitespaces + if ti.args.scrub_attributes: + common.SCRUB_TRAILING_WHITESPACE_TEST_RE = ( + common.SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE + ) + else: + common.SCRUB_TRAILING_WHITESPACE_TEST_RE = ( + common.SCRUB_TRAILING_WHITESPACE_RE + ) + + tool_basename = ti.args.tool + tool_binary = tool_basename + if tool_basename == initial_args.tool and initial_args.tool_binary: + tool_binary = initial_args.tool_binary + + prefix_list = [] + for l in ti.run_lines: + if "|" not in l: + common.warn("Skipping unparsable RUN line: " + l) + continue + + commands = [cmd.strip() for cmd in l.split("|")] + assert len(commands) >= 2 + if len(commands) > 2: + common.error("Complex pipes are unsupported") + sys.exit(1) + tool_cmd = commands[-2] + filecheck_cmd = commands[-1] + common.verify_filecheck_prefixes(filecheck_cmd) + if not tool_cmd.startswith(tool_basename + " "): + common.warn("Skipping non-%s RUN line: %s" % (tool_basename, l)) + continue + + if not filecheck_cmd.startswith("FileCheck "): + common.warn("Skipping non-FileChecked RUN line: " + l) + continue + + tool_cmd_args = tool_cmd[len(tool_basename) :].strip() + + check_prefixes = [ + item + for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd) + for item in m.group(1).split(",") + ] + if not check_prefixes: + check_prefixes = ["CHECK"] + + # FIXME: We should use multiple check prefixes to common check lines. For + # now, we just ignore all but the last. + prefix_list.append((check_prefixes, tool_cmd_args)) + + global_vars_seen_dict = {} + + function_re = None + scrubber = None + add_checks = None + + pal_metadata_dict = {} + + for prefixes, tool_args in prefix_list: + common.debug("Extracted tool cmd: " + tool_basename + " " + tool_args) + common.debug("Extracted FileCheck prefixes: " + str(prefixes)) + + raw_tool_output = common.invoke_tool_only( + tool_binary, tool_args, ti.path, verbose=ti.args.verbose + ) + + is_ir = common.OPT_FUNCTION_RE.search(raw_tool_output) is not None + if is_ir: + function_re = common.OPT_FUNCTION_RE + scrubber = common.scrub_body + add_checks = add_ir_checks + ginfo = common.make_ir_generalizer(ti.args.version) + else: + function_re = ASM_FUNCTION_AMDGPU_RE + scrubber = scrub_asm_amdgpu + add_checks = add_asm_checks + ginfo = common.make_asm_generalizer(ti.args.version) + + builder = common.FunctionTestBuilder( + run_list=prefix_list, + flags=ti.args, + scrubber_args=[], + path=ti.path, + ginfo=ginfo, + ) + + builder.process_run_line(function_re, scrubber, raw_tool_output, prefixes) + builder.processed_prefixes(prefixes) + + if ti.args.check_pal_metadata: + if not ginfo.is_asm(): + common.error( + f"{ti.path}: --check-pal-metadata only applies with asm output" + ) + sys.exit(1) + + process_pal_metadata(pal_metadata_dict, prefixes, raw_tool_output) + + func_dict = builder.finish_and_get_func_dict() + prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) + + if not ti.args.reset_variable_names: + original_check_lines = common.collect_original_check_lines(ti, prefix_set) + else: + original_check_lines = {} + + common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) + output_lines = [] + + # Generate the appropriate checks for each function. We need to emit + # these in the order according to the generated output so that CHECK-LABEL + # works properly. func_order provides that. + + # We can't predict where various passes might insert functions so we can't + # be sure the input function order is maintained. Therefore, first spit + # out all the source lines. + common.dump_input_lines(output_lines, ti, prefix_set, ti.comment_prefix) + + args = ti.args + + # Replace the meta variable containing the amdpal.pipelines and + # amdpal.version because it contains hashes that could change. + # Instead, use a regex containing "amdpal.pipelines{{.*}}amdpal.version" + global_var_dict = builder.global_var_dict() + for p in prefix_list: + checkprefixes = p[0] + for checkprefix in checkprefixes: + if "META" not in global_var_dict[checkprefix].keys(): + continue + + meta = global_var_dict[checkprefix]["META"] + # replace just the value containing amdpal.* and keep the other ones + meta = [ + ( + ( + x, + re.sub( + "(.*amdpal\.pipelines).*(amdpal\.version)", + "\\1{{.*}}\\2", + s, + ), + ) + if "amdpal" in s + else (x, s) + ) + for x, s in meta + ] + global_var_dict[checkprefix]["META"] = meta + + if args.check_globals != "none": + common.add_global_checks( + global_var_dict, + ti.comment_prefix, + prefix_list, + output_lines, + ginfo, + global_vars_seen_dict, + args.preserve_names, + True, + args.check_globals, + ) + + # Filter out functions + func_order = builder.func_order() + if ti.args.function: + filter_re = re.compile(ti.args.function) + new_func_order = {} + for prefix, func_names in func_order.items(): + new_func_order[prefix] = [ + func_name for func_name in func_names if filter_re.search(func_name) + ] + func_order = new_func_order + + # Now generate all the checks. + common.add_checks_at_end( + output_lines, + prefix_list, + func_order, + ti.comment_prefix, + lambda my_output_lines, prefixes, func: add_checks( + my_output_lines, + ti.comment_prefix, + prefixes, + func_dict, + func, + ginfo, + global_vars_seen_dict, + args, + is_filtered=builder.is_filtered(), + original_check_lines=original_check_lines.get(func, {}), + ), + ) + + if args.check_globals != "none": + common.add_global_checks( + global_var_dict, + ti.comment_prefix, + prefix_list, + output_lines, + ginfo, + global_vars_seen_dict, + args.preserve_names, + False, + args.check_globals, + ) + + if args.check_pal_metadata: + add_pal_metadata_checks( + pal_metadata_dict, ti.comment_prefix, prefix_list, output_lines + ) + + common.debug("Writing %d lines to %s..." % (len(output_lines), ti.path)) + + with open(ti.path, "wb") as f: + f.writelines(["{}\n".format(l).encode("utf-8") for l in output_lines]) + +if __name__ == "__main__": + main() diff --git a/tool/vfx/vfxVkSection.cpp b/tool/vfx/vfxVkSection.cpp index c77403f378..47cda4603b 100644 --- a/tool/vfx/vfxVkSection.cpp +++ b/tool/vfx/vfxVkSection.cpp @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -123,6 +123,10 @@ class VkSectionParserInit { ADD_CLASS_ENUM_MAP(InvariantLoads, EnableOptimization) ADD_CLASS_ENUM_MAP(InvariantLoads, DisableOptimization) ADD_CLASS_ENUM_MAP(InvariantLoads, ClearInvariants) + + ADD_CLASS_ENUM_MAP(LlvmScheduleStrategy, None) + ADD_CLASS_ENUM_MAP(LlvmScheduleStrategy, MaxIlp) + ADD_CLASS_ENUM_MAP(LlvmScheduleStrategy, MaxMemoryClause) } }; diff --git a/tool/vfx/vfxVkSection.h b/tool/vfx/vfxVkSection.h index 48fe19d495..66302452b3 100644 --- a/tool/vfx/vfxVkSection.h +++ b/tool/vfx/vfxVkSection.h @@ -325,6 +325,7 @@ class SectionShaderOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, viewIndexFromDeviceIndex, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, forceUnderflowPrevention, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, forceMemoryBarrierScope, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, scheduleStrategy, MemberTypeEnum, false); return addrTableInitializer; }(); return {addrTable.data(), addrTable.size()}; @@ -526,6 +527,7 @@ class SectionGlState : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, emulateWideLineStipple, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, enablePointSmooth, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, enableRemapLocation, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, enableDepthCompareParam, MemberTypeBool, false); return addrTableInitializer; }(); return {addrTable.data(), addrTable.size()}; @@ -594,6 +596,7 @@ class SectionPipelineOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enablePrimGeneratedQuery, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, disablePerCompFetch, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, optimizePointSizeWrite, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, padBufferSizeToNextDword, MemberTypeBool, false); INIT_MEMBER_NAME_TO_ADDR(SectionPipelineOption, m_compileTimeConstants, MemberTypeCompileConstInfo, true); return addrTableInitializer; }(); diff --git a/util/gpurtshim/CMakeLists.txt b/util/gpurtshim/CMakeLists.txt index f197174a12..7732a6e4c7 100644 --- a/util/gpurtshim/CMakeLists.txt +++ b/util/gpurtshim/CMakeLists.txt @@ -1,7 +1,7 @@ ## ####################################################################################################################### # - # Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + # Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to @@ -30,7 +30,7 @@ if(LLPC_RAY_TRACING AND NOT LLPC_IS_STANDALONE) add_library(vkgc_gpurtshim STATIC GpurtShim.cpp) include(../../cmake/CompilerFlags.cmake) - set_compiler_options(vkgc_gpurtshim ${LLPC_ENABLE_WERROR}) + set_compiler_options(vkgc_gpurtshim) # Link against vkgc_headers to pull in the necessary include directories and all the VKI_* defines target_link_libraries(vkgc_gpurtshim PUBLIC vkgc_headers) diff --git a/util/vkgcUtil.cpp b/util/vkgcUtil.cpp index c4b26d5e43..e45597090a 100644 --- a/util/vkgcUtil.cpp +++ b/util/vkgcUtil.cpp @@ -257,4 +257,60 @@ const char *getEntryPointNameFromSpirvBinary(const BinaryData *spvBin) { return entryName; } +// ===================================================================================================================== +// Calculate 64-bit CRC for the given block of data +// +// Returns 64-bit CRC compatible with CRC-64/XZ +// +// @param data : Pointer to the block of data +// @param size : Size of the data in bytes +// @param refin : Whether to reflect input +// @param refout : Whether to reflect result +uint64_t calculateCrc64(const void *data, size_t size, bool refin, bool refout) { + static constexpr uint64_t Poly = 0x42F0E1EBA9EA3693; + static constexpr uint64_t InitV = 0xFFFFFFFFFFFFFFFF; + static constexpr uint64_t XorOut = 0xFFFFFFFFFFFFFFFF; + + auto reflectByte = [](uint8_t b) { + b = (b & 0xF0) >> 4 | (b & 0x0F) << 4; + b = (b & 0xCC) >> 2 | (b & 0x33) << 2; + b = (b & 0xAA) >> 1 | (b & 0x55) << 1; + return b; + }; + + auto reflect64 = [](uint64_t value) { + uint64_t result = 0; + for (int i = 0; i < 64; ++i) { + if (value & (1ULL << i)) { + result |= 1ULL << (63 - i); + } + } + return result; + }; + + uint64_t crc = InitV; + const uint8_t *ptr = reinterpret_cast(data); + const uint8_t *end = ptr + size; + while (ptr < end) { + uint8_t byte = *ptr; + if (refin) { + byte = reflectByte(byte); + } + crc ^= static_cast(byte) << 56; + for (int i = 0; i < 8; ++i) { + if (crc & 0x8000000000000000) { + crc = (crc << 1) ^ Poly; + } else { + crc <<= 1; + } + } + ptr++; + } + + if (refout) { + crc = reflect64(crc); + } + return crc ^ XorOut; +} + } // namespace Vkgc diff --git a/util/vkgcUtil.h b/util/vkgcUtil.h index b50616f886..61a233e441 100644 --- a/util/vkgcUtil.h +++ b/util/vkgcUtil.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2020-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -73,6 +73,9 @@ inline void *voidPtrInc(const void *p, size_t numBytes) { return (static_cast(ptr) + numBytes); } +// Calculate 64-bit CRC for the given block of data +uint64_t calculateCrc64(const void *data, size_t size, bool refin = true, bool refout = true); + // =================================================================================== // Finds the expected structure in Vulkan structure chain with the specified info. template diff --git a/version/CMakeLists.txt b/version/CMakeLists.txt index 925eb8007c..350e7672ae 100644 --- a/version/CMakeLists.txt +++ b/version/CMakeLists.txt @@ -86,6 +86,9 @@ llpc_set_property(llpc_version INTERFACE LLPC_BUILD_GFX115 ON "HW_GFX115") #if LLPC_BUILD_STRIX1 llpc_set_property(llpc_version INTERFACE LLPC_BUILD_STRIX1 ON "HW_STRIX1") #endif +#if LLPC_BUILD_STRIX_HALO +llpc_set_property(llpc_version INTERFACE LLPC_BUILD_STRIX_HALO ON "HW_STRIX_HALO") +#endif # Report the summary of what is enabled. message(STATUS "llpc_version:${LLPC_SET_PROPERTY_SUMMARY_llpc_version}") diff --git a/version/include/llpc/GfxRuntimeCommon.hlsli b/version/include/llpc/GfxRuntimeCommon.hlsli new file mode 100644 index 0000000000..0cfaf736f3 --- /dev/null +++ b/version/include/llpc/GfxRuntimeCommon.hlsli @@ -0,0 +1,67 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file GfxRuntimeCommon.hlsli + * @brief Declare common utils that are used in internal runtime library + *********************************************************************************************************************** + */ +#pragma once + +// clang-format off + +#ifndef DUMMY_VOID_FUNC +#ifdef AMD_VULKAN +#define DUMMY_VOID_FUNC {} +#else // AMD_VULKAN +#define DUMMY_VOID_FUNC ; +#endif +#endif + +#ifndef DUMMY_GENERIC_FUNC +#ifdef AMD_VULKAN +#define DUMMY_GENERIC_FUNC(value) { return value; } +#else // AMD_VULKAN +#define DUMMY_GENERIC_FUNC(value) ; +#endif +#endif + +#ifndef GFX_RUNTIME_COMMON_INOUT +#ifdef __cplusplus +#define GFX_RUNTIME_COMMON_INOUT +#else +#define GFX_RUNTIME_COMMON_INOUT inout +#endif +#endif + +#ifndef GFX_RUNTIME_COMMON_DECL +#ifdef __cplusplus +#define GFX_RUNTIME_COMMON_DECL extern +#elif AMD_VULKAN +#define GFX_RUNTIME_COMMON_DECL [noinline] +#else +#define GFX_RUNTIME_COMMON_DECL +#endif +#endif diff --git a/version/include/llpc/GpurtEnums.h b/version/include/llpc/GpurtEnums.h index c8e4f61940..ca968f56c4 100644 --- a/version/include/llpc/GpurtEnums.h +++ b/version/include/llpc/GpurtEnums.h @@ -1,7 +1,7 @@ /* *********************************************************************************************************************** * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -64,3 +64,13 @@ enum class RayTracingIpLevel : uint32_t { RtIp1_1 = 11, RtIp2_0 = 20, }; + +// CPS Scheduling levels. +enum class CpsSchedulingLevel : uint32_t { + RayGen = 1, + ClosestHit_Miss_Callable, + Traversal, + AnyHit_CombinedIntersection_AnyHit, + Intersection, + Count, +}; diff --git a/version/include/llpc/GpurtIntrinsics.h b/version/include/llpc/GpurtIntrinsics.h index 0bb6cfe9c7..022fdddcce 100644 --- a/version/include/llpc/GpurtIntrinsics.h +++ b/version/include/llpc/GpurtIntrinsics.h @@ -30,37 +30,13 @@ */ #pragma once +#include "llpc/GfxRuntimeCommon.hlsli" #include "llpc/GpurtEnums.h" // clang-format off -#ifndef DUMMY_VOID_FUNC -#ifdef AMD_VULKAN -#define DUMMY_VOID_FUNC {} -#else // AMD_VULKAN -#define DUMMY_VOID_FUNC ; -#endif -#endif - -#ifndef DUMMY_GENERIC_FUNC -#ifdef AMD_VULKAN -#define DUMMY_GENERIC_FUNC(value) { return value; } -#else // AMD_VULKAN -#define DUMMY_GENERIC_FUNC(value) ; -#endif -#endif - -#ifdef __cplusplus -#define GPURT_INOUT -#define GPURT_DECL extern -#else // __cplusplus -#define GPURT_INOUT inout -#ifdef AMD_VULKAN -#define GPURT_DECL [noinline] -#else // AMD_VULKAN -#define GPURT_DECL -#endif -#endif +#define GPURT_INOUT GFX_RUNTIME_COMMON_INOUT +#define GPURT_DECL GFX_RUNTIME_COMMON_DECL //===================================================================================================================== // Continuation intrinsics diff --git a/version/include/llpcVersion.h.in b/version/include/llpcVersion.h.in index b2d98d025a..0b64411a53 100644 --- a/version/include/llpcVersion.h.in +++ b/version/include/llpcVersion.h.in @@ -37,6 +37,8 @@ // %Version History // | %Version | Change Description | // | -------- | ----------------------------------------------------------------------------------------------------- | +// | 75.12| Add enableDepthCompareParam to PipelineOptions. | +// | 75.11| Add scheduleStrategy to PipelineShaderOptions | // | 75.10| Add temporalHintShaderControl to PipelineShaderOptions | // | 75.9 | Add rtIgnoreDeclaredPayloadSize to RayTracingPipelineBuildInfo. | // | 75.8 | Add forceMemoryBarrierScope to PipelineShaderOptions. | @@ -201,7 +203,7 @@ #define LLPC_INTERFACE_MAJOR_VERSION 75 /// LLPC minor interface version. -#define LLPC_INTERFACE_MINOR_VERSION 10 +#define LLPC_INTERFACE_MINOR_VERSION 11 /// The client's LLPC major interface version #ifndef LLPC_CLIENT_INTERFACE_MAJOR_VERSION