- // compute shader简单示例
- #include <windows.h>
- #include <crtdbg.h>
- #include <d3dcommon.h>
- #include <d3d11.h>
- #include <d3dcompiler.h>
-
- #include <stdio.h>
- #include <stdint.h>
- #include <stdbool.h>
-
- #define NUM_ELEMENTS 2048
-
- static struct BufType
- {
- int i;
- float f;
- } s_vBuf0[NUM_ELEMENTS], s_vBuf1[NUM_ELEMENTS];
-
- static bool CreateComputeDevice(ID3D11Device** ppDeviceOut, ID3D11DeviceContext** ppContextOut)
- {
- *ppDeviceOut = NULL;
- *ppContextOut = NULL;
-
- const uint32_t uCreationFlags = D3D11_CREATE_DEVICE_SINGLETHREADED | D3D11_CREATE_DEVICE_DEBUG;
-
- D3D_FEATURE_LEVEL flOut;
- const D3D_FEATURE_LEVEL flvl[] = { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0 };
-
- bool result = D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, uCreationFlags, flvl,
- sizeof(flvl) / sizeof(D3D_FEATURE_LEVEL), D3D11_SDK_VERSION, ppDeviceOut, &flOut, ppContextOut) >= 0;
- if (result)
- printf("Currently use Direct3D level: %d.%d\n", flOut >> 12, (flOut >> 8) & 0xf);
-
- return result;
- }
-
- static bool CreateStructureBuffer(ID3D11Device* pDevice, uint32_t elementSize, uint32_t uCount,
- void* pInitData, ID3D11Buffer** ppBufferOut)
- {
- *ppBufferOut = NULL;
-
- D3D11_BUFFER_DESC desc;
- ZeroMemory(&desc, sizeof(desc));
- desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
- desc.ByteWidth = elementSize*uCount;
- desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
- desc.StructureByteStride = elementSize;
-
- if (pInitData != NULL)
- {
- D3D11_SUBRESOURCE_DATA InitData = { 0 };
- InitData.pSysMem = pInitData;
- return pDevice->lpVtbl->CreateBuffer(pDevice, &desc, &InitData, ppBufferOut) >= 0;
- }
- else
- return pDevice->lpVtbl->CreateBuffer(pDevice, &desc, NULL, ppBufferOut) >= 0;
- }
-
- static bool CreateConstantBuffer(ID3D11Device* pDevice, uint32_t nBytes, void* pInitData, ID3D11Buffer** ppBufferOut)
- {
- *ppBufferOut = NULL;
-
- D3D11_BUFFER_DESC desc;
- ZeroMemory(&desc, sizeof(desc));
- desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
- desc.ByteWidth = nBytes;
- desc.Usage = D3D11_USAGE_DYNAMIC;
- desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
-
- D3D11_SUBRESOURCE_DATA initData;
- initData.pSysMem = pInitData;
- initData.SysMemPitch = 0;
- initData.SysMemSlicePitch = 0;
-
- return pDevice->lpVtbl->CreateBuffer(pDevice, &desc, &initData, ppBufferOut) >= 0;
- }
-
- static bool CreateComputeShader(LPCWSTR pSrcFile, LPCSTR pFunctionName,
- ID3D11Device* pDevice, ID3D11ComputeShader** ppShaderOut)
- {
- uint32_t dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
-
- // Set the D3DCOMPILE_DEBUG flag to embed debug information in the shaders.
- // Setting this flag improves the shader debugging experience, but still allows
- // the shaders to be optimized and to run exactly the way they will run in
- // the release configuration of this program.
- dwShaderFlags |= D3DCOMPILE_DEBUG;
-
- const D3D_SHADER_MACRO defines[] =
- {
- "USE_STRUCTURED_BUFFERS", "1",
- NULL, NULL
- };
-
- // We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware
- ID3DBlob* pErrorBlob = NULL;
- ID3DBlob* computeShader = NULL;
-
- if (D3DCompileFromFile(pSrcFile, defines, NULL, pFunctionName, "cs_5_0", dwShaderFlags, 0,
- &computeShader, &pErrorBlob) < 0)
- {
- if (pErrorBlob != NULL)
- OutputDebugStringA((char*)pErrorBlob->lpVtbl->GetBufferPointer(pErrorBlob));
-
- if(pErrorBlob != NULL)
- pErrorBlob->lpVtbl->Release(pErrorBlob);
- if(computeShader != NULL)
- computeShader->lpVtbl->Release(computeShader);
-
- return false;
- }
-
- bool result = true;
- if (pDevice->lpVtbl->CreateComputeShader(pDevice, computeShader->lpVtbl->GetBufferPointer(computeShader),
- computeShader->lpVtbl->GetBufferSize(computeShader), NULL, ppShaderOut))
- result = false;
-
- if (pErrorBlob != NULL)
- pErrorBlob->lpVtbl->Release(pErrorBlob);
- if (computeShader != NULL)
- computeShader->lpVtbl->Release(computeShader);
-
- return result;
- }
-
- /**
- 利用ID3D11Device::CreateShaderResouceView()来创建GPU中Buffer的resourceView
- */
- static bool CreateBufferSRV(ID3D11Device* pDevice, ID3D11Buffer* pBuffer, ID3D11ShaderResourceView** ppSRVOut)
- {
- D3D11_BUFFER_DESC descBuf;
- ZeroMemory(&descBuf, sizeof(descBuf));
- pBuffer->lpVtbl->GetDesc(pBuffer, &descBuf);
-
- D3D11_SHADER_RESOURCE_VIEW_DESC desc;
- ZeroMemory(&desc, sizeof(desc));
- desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX;
- desc.BufferEx.FirstElement = 0;
-
- //假定这是个structure buffer
- desc.Format = DXGI_FORMAT_UNKNOWN;
- desc.BufferEx.NumElements = descBuf.ByteWidth / descBuf.StructureByteStride;
-
- return pDevice->lpVtbl->CreateShaderResourceView(pDevice, (ID3D11Resource*)pBuffer, &desc, ppSRVOut) >= 0;
- }
-
- static bool CreateBufferUAV(ID3D11Device* pDevice, ID3D11Buffer* pBuffer, ID3D11UnorderedAccessView** ppUAVOut)
- {
- D3D11_BUFFER_DESC descBuf;
- ZeroMemory(&descBuf, sizeof(descBuf));
- pBuffer->lpVtbl->GetDesc(pBuffer, &descBuf);
-
- D3D11_UNORDERED_ACCESS_VIEW_DESC desc;
- ZeroMemory(&desc, sizeof(desc));
- desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
- desc.Buffer.FirstElement = 0;
-
- //假设这是一个structure buffer
- desc.Format = DXGI_FORMAT_UNKNOWN;
- desc.Buffer.NumElements = descBuf.ByteWidth / descBuf.StructureByteStride;
-
- return pDevice->lpVtbl->CreateUnorderedAccessView(pDevice, (ID3D11Resource*)pBuffer, &desc, ppUAVOut) >= 0;
- }
-
- static void RunComputeShader(ID3D11DeviceContext* pImmediateContext, ID3D11ComputeShader* pComputeShader,
- uint32_t nSRVs, uint32_t nUAVs, ID3D11ShaderResourceView* pShaderResourceViews[],
- ID3D11UnorderedAccessView* pUnorderedViews[], uint32_t X, uint32_t Y, uint32_t Z)
- {
- pImmediateContext->lpVtbl->CSSetShader(pImmediateContext, pComputeShader, NULL, 0);
- pImmediateContext->lpVtbl->CSSetShaderResources(pImmediateContext, 0, nSRVs, pShaderResourceViews);
- pImmediateContext->lpVtbl->CSSetUnorderedAccessViews(pImmediateContext, 0, nUAVs, pUnorderedViews, NULL);
- pImmediateContext->lpVtbl->Dispatch(pImmediateContext, NUM_ELEMENTS, 1, 1);
-
- //清空Shader和各个Shader Resource View、Unordered Access View以及一些Constant Buffer
- pImmediateContext->lpVtbl->CSSetShader(pImmediateContext, NULL, NULL, 0);
-
- ID3D11UnorderedAccessView* ppUAViewNULL[] = { NULL, NULL };
- pImmediateContext->lpVtbl->CSSetUnorderedAccessViews(pImmediateContext, 0, 2, ppUAViewNULL, NULL);
-
- ID3D11ShaderResourceView* ppSRVNULL[2] = { NULL,NULL };
- pImmediateContext->lpVtbl->CSSetShaderResources(pImmediateContext, 0, 2, ppSRVNULL);
-
- ID3D11Buffer* ppCBNULL[1] = { NULL };
- pImmediateContext->lpVtbl->CSSetConstantBuffers(pImmediateContext, 0, 1, ppCBNULL);
- }
-
- static ID3D11Buffer* CreateAndCopyToDebugBuf(ID3D11Device* pDevice, ID3D11DeviceContext* pd3dImmediateContext,
cpp运行
- ID3D11Buffer* pBuffer)
- {
- ID3D11Buffer* debugBuf = NULL;
- D3D11_BUFFER_DESC desc;
- ZeroMemory(&desc, sizeof(desc));
- pBuffer->lpVtbl->GetDesc(pBuffer, &desc);
- desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
- desc.Usage = D3D11_USAGE_STAGING;
- desc.BindFlags = 0;
- desc.MiscFlags = 0;
-
- if (pDevice->lpVtbl->CreateBuffer(pDevice, &desc, NULL, &debugBuf) >= 0)
- {
- pd3dImmediateContext->lpVtbl->CopyResource(pd3dImmediateContext, (ID3D11Resource*)debugBuf,
- (ID3D11Resource*)pBuffer);
- }
- return debugBuf;
- }
-
- int main(void)
- {
- _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
-
- ID3D11Device *device = NULL;
- ID3D11DeviceContext *context = NULL;
- ID3D11ComputeShader *computeShader = NULL;
-
- //各个Buffer指针变量
- ID3D11Buffer *srcBuffer0 = NULL;
- ID3D11Buffer *srcBuffer1 = NULL;
- ID3D11Buffer *resultBuffer = NULL;
- ID3D11Buffer *srcDstBuffer = NULL;
- ID3D11Buffer *constBuffer = NULL;
-
- //读写上面buffer的ID3D11ShaderResourceView和UnorderedAccessView接口
- ID3D11ShaderResourceView *srcBuf0SRV = NULL;
- ID3D11ShaderResourceView *srcBuf1SRV = NULL;
- ID3D11UnorderedAccessView *resBufUAV = NULL;
- ID3D11UnorderedAccessView *srcdstBufUAV = NULL;
-
- int localBuffer[NUM_ELEMENTS];
- for (int i = 0; i < NUM_ELEMENTS; i++)
- localBuffer[i] = i + 1;
-
- do
- {
- if (!CreateComputeDevice(&device, &context))
- {
- puts("CreateComputeDevice failed!");
- break;
- }
-
- if (!CreateComputeShader(L"compute.hlsl", "CSMain", device, &computeShader))
- {
- puts("CreateComputeShader failed!");
- break;
- }
-
- //初始化计算数据
- for (int i = 0; i<NUM_ELEMENTS; i++)
- {
- s_vBuf0[i].i = i;
- s_vBuf0[i].f = (float)i;
-
- s_vBuf1[i].i = i;
- s_vBuf1[i].f = (float)i;
- }
-
- //为CPU中的数组创建GPU中相应Buffer
- if (!CreateStructureBuffer(device, sizeof(struct BufType), NUM_ELEMENTS, s_vBuf0, &srcBuffer0))
- {
- puts("create srcBuffer0 failed");
- break;
- }
-
- if (!CreateStructureBuffer(device, sizeof(struct BufType), NUM_ELEMENTS, s_vBuf1, &srcBuffer1))
- {
- puts("create srcBuffer1 failed");
- break;
- }
-
- if (!CreateStructureBuffer(device, sizeof(struct BufType), NUM_ELEMENTS, NULL, &resultBuffer))
- {
- puts("Create resultBuffer failed");
- break;
- }
-
- if (!CreateStructureBuffer(device, sizeof(localBuffer[0]), _countof(localBuffer), localBuffer, &srcDstBuffer))
- {
- puts("create srcDstBuffer failed");
- break;
- }
-
- // 在D3D11中,常量缓存至少需要4个int元素
- int value[4] = { 10, 20 };
- if (!CreateConstantBuffer(device, sizeof(value), value, &constBuffer))
- {
- puts("Create constBuffer failed!");
- break;
- }
- // 绑定常量缓存
- context->lpVtbl->CSSetConstantBuffers(context, 0, 1, &constBuffer);
-
- //为buffer创建相应的shader resource view与unordered access view
- if (!CreateBufferSRV(device, srcBuffer0, &srcBuf0SRV))
- {
- puts("create srcBuf0SRV failed");
- break;
- }
-
- if (!CreateBufferSRV(device, srcBuffer1, &srcBuf1SRV))
- {
- puts("create srcBuf1SRV failed");
- break;
- }
-
- if (!CreateBufferUAV(device, resultBuffer, &resBufUAV))
- {
- puts("create resBufUAV failed");
- break;
- }
-
- if (!CreateBufferUAV(device, srcDstBuffer, &srcdstBufUAV))
- {
- puts("create srcdstBufUAV failed!");
- break;
- }
-
- ID3D11ShaderResourceView* shaderResourceViews[] = { srcBuf0SRV, srcBuf1SRV };
- ID3D11UnorderedAccessView* unorderedAccessViews[] = { resBufUAV, srcdstBufUAV };
-
- //运行Shader Compute程序
- RunComputeShader(context, computeShader, _countof(shaderResourceViews), _countof(unorderedAccessViews),
- shaderResourceViews, unorderedAccessViews, NUM_ELEMENTS, 1, 1);
-
- //将GPU计算的结果写回CPU
- ID3D11Buffer* debugBuf = NULL;
- // 先查看resultBuffer中的内容
- debugBuf = CreateAndCopyToDebugBuf(device, context, resultBuffer);
- if (debugBuf == NULL)
- {
- puts("debugBuf create failed!");
- break;
- }
-
- D3D11_MAPPED_SUBRESOURCE mappedResource;
- context->lpVtbl->Map(context, (ID3D11Resource*)debugBuf, 0, D3D11_MAP_READ, 0, &mappedResource);
- struct BufType *p = mappedResource.pData;
-
- puts("Output GPU resultBuffer results, first ten:");
- for (int i = 0; i < 10; i++)
- printf("i: %d, f: %.1f\n", p[i].i, p[i].f);
-
- puts("last ten:");
- for(int i = NUM_ELEMENTS - 10; i < NUM_ELEMENTS; i++)
- printf("i: %d, f: %.1f\n", p[i].i, p[i].f);
-
- context->lpVtbl->Unmap(context, (ID3D11Resource*)debugBuf, 0);
- debugBuf->lpVtbl->Release(debugBuf);
-
- // 再查看srcdstBuffer中的内容
- debugBuf = CreateAndCopyToDebugBuf(device, context, srcDstBuffer);
- if (debugBuf == NULL)
- {
- puts("debugBuf create failed!");
- break;
- }
- context->lpVtbl->Map(context, (ID3D11Resource*)debugBuf, 0, D3D11_MAP_READ, 0, &mappedResource);
- int *q = mappedResource.pData;
-
- puts("Output GPU srcDstBuffer results, first ten:");
- for (int i = 0; i < 10; i++)
- printf("[%d] = %d\n", i, q[i]);
-
- puts("last ten:");
- for (int i = NUM_ELEMENTS - 10; i < NUM_ELEMENTS; i++)
- printf("[%d] = %d\n", i, q[i]);
-
- context->lpVtbl->Unmap(context, (ID3D11Resource*)debugBuf, 0);
- debugBuf->lpVtbl->Release(debugBuf);
- }
- while (false);
-
- //释放资源
- if (srcBuf0SRV != NULL)
- srcBuf0SRV->lpVtbl->Release(srcBuf0SRV);
-
- if (srcBuf1SRV != NULL)
- srcBuf1SRV->lpVtbl->Release(srcBuf1SRV);
-
- if (resBufUAV != NULL)
- resBufUAV->lpVtbl->Release(resBufUAV);
-
- if (srcdstBufUAV != NULL)
- srcdstBufUAV->lpVtbl->Release(srcdstBufUAV);
-
- if (srcBuffer0 != NULL)
- srcBuffer0->lpVtbl->Release(srcBuffer0);
-
- if (srcBuffer1 != NULL)
- srcBuffer1->lpVtbl->Release(srcBuffer1);
-
- if (resultBuffer != NULL)
- resultBuffer->lpVtbl->Release(resultBuffer);
-
- if (srcDstBuffer != NULL)
- srcDstBuffer->lpVtbl->Release(srcDstBuffer);
-
- if (computeShader != NULL)
- computeShader->lpVtbl->Release(computeShader);
-
- if (context != NULL)
- context->lpVtbl->Release(context);
-
- if (device != NULL)
- device->lpVtbl->Release(device);
-
- puts("\nInput enter to exit...");
- getchar();
- }
cpp运行
- // 这是一个计算着色器程序
-
- struct BufType
- {
- int i;
- float f;
- };
-
- // 对应于主机端的constant buffer
- cbuffer cbNeverChanges : register(b0)
- {
- int cValue0;
- int cValue1;
- };
-
- // 对应于主机端的Shader Resource View
- StructuredBuffer<BufType> buffer0 : register(t0);
- StructuredBuffer<BufType> buffer1 : register(t1);
-
- // 对应于主机端的Unordered Access View
- RWStructuredBuffer<BufType> bufferOut : register(u0);
- RWStructuredBuffer<int> srcdstBuffer : register(u1);
-
- // Direct3D中,一个线程组(threadgroup)最多允许1024个线程
- [numthreads(1024, 1, 1)]
- void CSMain(uint3 groupID : SV_GroupID, uint3 tid : SV_DispatchThreadID,
- uint3 localTID : SV_GroupThreadID, uint gIdx : SV_GroupIndex)
- {
- const int index = tid.x;
- const int cValue = cValue1 / cValue0;
- int resValue = (buffer0[index].i + buffer1[index].i) * cValue - srcdstBuffer[index];
- bufferOut[index].i = resValue;
- bufferOut[index].f = (buffer0[index].f + buffer1[index].f) * float(cValue);
-
- srcdstBuffer[index] = resValue;
- }
cpp运行
武汉格发信息技术有限公司,格发许可优化管理系统可以帮你评估贵公司软件许可的真实需求,再低成本合规性管理软件许可,帮助贵司提高软件投资回报率,为软件采购、使用提供科学决策依据。支持的软件有: CAD,CAE,PDM,PLM,Catia,Ugnx, AutoCAD, Pro/E, Solidworks ,Hyperworks, Protel,CAXA,OpenWorks LandMark,MATLAB,Enovia,Winchill,TeamCenter,MathCAD,Ansys, Abaqus,ls-dyna, Fluent, MSC,Bentley,License,UG,ug,catia,Dassault Systèmes,AutoDesk,Altair,autocad,PTC,SolidWorks,Ansys,Siemens PLM Software,Paradigm,Mathworks,Borland,AVEVA,ESRI,hP,Solibri,Progman,Leica,Cadence,IBM,SIMULIA,Citrix,Sybase,Schlumberger,MSC Products...