添加HLSLcc为非子模块
This commit is contained in:
parent
e9849ec7bc
commit
2cc7cf4f32
73
third_party/HLSLcc/.editorconfig
vendored
Normal file
73
third_party/HLSLcc/.editorconfig
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
# see http://editorconfig.org/ for docs on this file
|
||||
|
||||
root = true
|
||||
|
||||
[*]
|
||||
# help with sharing files across os's (i.e. network share or through local vm)
|
||||
end_of_line = lf
|
||||
#charset temporarily disabled due to bug in VS2017 changing to UTF-8 with BOM (https://favro.com/card/c564ede4ed3337f7b17986b6/Uni-17877)
|
||||
#charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
|
||||
# formattable file extensions (keep in sync with format.ini from unity-meta repo)
|
||||
#
|
||||
# Note: We need to split the formattable files configs into shorter duplicate entries (logically grouped)
|
||||
# due to known issue in VS editorconfig extension where there is a limit of 51 characters (empirically determined).
|
||||
# see: https://github.com/editorconfig/editorconfig-visualstudio/issues/21
|
||||
#
|
||||
## uncrustify
|
||||
[*.{c,h,cpp,hpp,m,mm,cc,cs}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
## generic formatter (shaders)
|
||||
[*.{cg,cginc,glslinc,hlsl,shader,y,ypp,yy}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
## generic formatter (misc)
|
||||
[*.{asm,s,S,pch,pchmm,java,sh,uss}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
## perltidy
|
||||
[*.{pl,pm,t,it}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
## unity special
|
||||
[*.{bindings,mem.xml}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
|
||||
# other filetypes we want to overwrite default configuration to preserve the standard
|
||||
[{Makefile,makefile}]
|
||||
# TAB characters are part of the Makefile format
|
||||
indent_style = tab
|
||||
|
||||
[*.{md,markdown}]
|
||||
# trailing whitespace is significant in markdown (bad choice, bad!)
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
# keep these and the VS stuff below in sync with .hgeol's CRLF extensions
|
||||
[*.{vcproj,bat,cmd,xaml,tt,t4,ttinclude}]
|
||||
end_of_line = crlf
|
||||
|
||||
# this VS-specific stuff is based on experiments to see how VS will modify a file after it has been manually edited.
|
||||
# the settings are meant to closely match what VS does to minimize unnecessary diffs. this duplicates some settings in *
|
||||
# but let's be explicit here to be safe (in case someone wants to copy-paste this out to another .editorconfig).
|
||||
[*.{vcxproj,vcxproj.filters,csproj,props,targets}]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
end_of_line = crlf
|
||||
charset = utf-8-bom
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = false
|
||||
[*.{sln,sln.template}]
|
||||
indent_style = tab
|
||||
indent_size = 4
|
||||
end_of_line = crlf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = false
|
29
third_party/HLSLcc/.gitignore
vendored
Normal file
29
third_party/HLSLcc/.gitignore
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
# Compiled Object files
|
||||
*.slo
|
||||
*.lo
|
||||
*.o
|
||||
*.obj
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Compiled Dynamic libraries
|
||||
*.so
|
||||
*.dylib
|
||||
*.dll
|
||||
|
||||
# Fortran module files
|
||||
*.mod
|
||||
*.smod
|
||||
|
||||
# Compiled Static libraries
|
||||
*.lai
|
||||
*.la
|
||||
*.a
|
||||
*.lib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
51
third_party/HLSLcc/CMakeLists.txt
vendored
Normal file
51
third_party/HLSLcc/CMakeLists.txt
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
project(HLSLcc)
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
|
||||
option(HLSLCC_LIBRARY_SHARED "Build shared library instead of static." ON)
|
||||
|
||||
file(GLOB HLSLCC_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/*")
|
||||
set(HLSLCC_SRC
|
||||
src/ControlFlowGraph.cpp
|
||||
src/ControlFlowGraphUtils.cpp
|
||||
src/DataTypeAnalysis.cpp
|
||||
src/Declaration.cpp
|
||||
src/decode.cpp
|
||||
src/HLSLcc.cpp
|
||||
src/HLSLccToolkit.cpp
|
||||
src/HLSLCrossCompilerContext.cpp
|
||||
src/Instruction.cpp
|
||||
src/LoopTransform.cpp
|
||||
src/Operand.cpp
|
||||
src/reflect.cpp
|
||||
src/Shader.cpp
|
||||
src/ShaderInfo.cpp
|
||||
src/toGLSL.cpp
|
||||
src/toGLSLDeclaration.cpp
|
||||
src/toGLSLInstruction.cpp
|
||||
src/toGLSLOperand.cpp
|
||||
src/toMetal.cpp
|
||||
src/toMetalDeclaration.cpp
|
||||
src/toMetalInstruction.cpp
|
||||
src/toMetalOperand.cpp
|
||||
src/UseDefineChains.cpp
|
||||
src/cbstring/bsafe.c
|
||||
src/cbstring/bstraux.c
|
||||
src/cbstring/bstrlib.c)
|
||||
|
||||
if(HLSLCC_LIBRARY_SHARED)
|
||||
add_library(${PROJECT_NAME} SHARED ${HLSLCC_SRC})
|
||||
else()
|
||||
add_library(${PROJECT_NAME} STATIC ${HLSLCC_SRC})
|
||||
endif()
|
||||
|
||||
target_include_directories(${PROJECT_NAME}
|
||||
PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/cbstring
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/internal_includes)
|
54
third_party/HLSLcc/README.md
vendored
Normal file
54
third_party/HLSLcc/README.md
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
# HLSLcc
|
||||
DirectX shader bytecode cross compiler.
|
||||
|
||||
Originally based on https://github.com/James-Jones/HLSLCrossCompiler.
|
||||
|
||||
This library takes DirectX bytecode as input, and translates it into the following languages:
|
||||
- GLSL (OpenGL 3.2 and later)
|
||||
- GLSL ES (OpenGL ES 2.0 and later)
|
||||
- GLSL for Vulkan consumption (as input for Glslang to generate SPIR-V)
|
||||
- Metal Shading Language
|
||||
|
||||
This library is used to generate all shaders in Unity for OpenGL, OpenGL ES 3.0+, Metal and Vulkan.
|
||||
|
||||
Changes from original HLSLCrossCompiler:
|
||||
- Codebase changed to C++11, with major code reorganizations.
|
||||
- Support for multiple language output backends (currently ToGLSL and ToMetal)
|
||||
- Metal language output support
|
||||
- Temp register type analysis: In DX bytecode the registers are typeless 32-bit 4-vectors. We do code analysis to infer the actual data types (to prevent the need for tons of bitcasts).
|
||||
- Loop transformation: Detect constructs that look like for-loops and transform them back to their original form
|
||||
- Support for partial precision variables in HLSL (min16float etc). Do extra analysis pass to infer the intended precision of samplers.
|
||||
- Reflection interface to retrieve the shader inputs and their types.
|
||||
- Lots of workarounds for various driver/shader compiler bugs.
|
||||
- Lots of minor fixes and improvements for correctness
|
||||
- Lots of Unity-specific tweaks to allow extending HLSL without having to change the D3D compiler itself.
|
||||
|
||||
## Note
|
||||
|
||||
This project is originally integrated into the Unity build systems. However, building this library should be fairly straightforward: just compile `src/*.cpp` (in C++11 mode!) and `src/cbstring/*.c` with the following include paths:
|
||||
|
||||
- include
|
||||
- src/internal_includes
|
||||
- src/cbstrinc
|
||||
- src
|
||||
|
||||
Alternatively, a CMakeLists.txt is provided to build the project using cmake.
|
||||
|
||||
The main entry point is TranslateHLSLFromMem() function in HLSLcc.cpp (taking DX bytecode as input).
|
||||
|
||||
|
||||
## Contributors
|
||||
- Mikko Strandborg
|
||||
- Juho Oravainen
|
||||
- David Rogers
|
||||
- Marton Ekler
|
||||
- Antti Tapaninen
|
||||
- Florian Penzkofer
|
||||
- Alexey Orlov
|
||||
- Povilas Kanapickas
|
||||
- Aleksandr Kirillov
|
||||
- Kay Chang
|
||||
|
||||
## License
|
||||
|
||||
MIT license for HLSLcc itself, BSD license for the bstring library. See license.txt.
|
510
third_party/HLSLcc/include/ShaderInfo.h
vendored
Normal file
510
third_party/HLSLcc/include/ShaderInfo.h
vendored
Normal file
@ -0,0 +1,510 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "growing_array.h"
|
||||
#include <stdint.h>
|
||||
|
||||
//Reflection
|
||||
#define MAX_RESOURCE_BINDINGS 256
|
||||
|
||||
typedef enum _SHADER_VARIABLE_TYPE
|
||||
{
|
||||
SVT_VOID = 0,
|
||||
SVT_BOOL = 1,
|
||||
SVT_INT = 2,
|
||||
SVT_FLOAT = 3,
|
||||
SVT_STRING = 4,
|
||||
SVT_TEXTURE = 5,
|
||||
SVT_TEXTURE1D = 6,
|
||||
SVT_TEXTURE2D = 7,
|
||||
SVT_TEXTURE3D = 8,
|
||||
SVT_TEXTURECUBE = 9,
|
||||
SVT_SAMPLER = 10,
|
||||
SVT_PIXELSHADER = 15,
|
||||
SVT_VERTEXSHADER = 16,
|
||||
SVT_UINT = 19,
|
||||
SVT_UINT8 = 20,
|
||||
SVT_GEOMETRYSHADER = 21,
|
||||
SVT_RASTERIZER = 22,
|
||||
SVT_DEPTHSTENCIL = 23,
|
||||
SVT_BLEND = 24,
|
||||
SVT_BUFFER = 25,
|
||||
SVT_CBUFFER = 26,
|
||||
SVT_TBUFFER = 27,
|
||||
SVT_TEXTURE1DARRAY = 28,
|
||||
SVT_TEXTURE2DARRAY = 29,
|
||||
SVT_RENDERTARGETVIEW = 30,
|
||||
SVT_DEPTHSTENCILVIEW = 31,
|
||||
SVT_TEXTURE2DMS = 32,
|
||||
SVT_TEXTURE2DMSARRAY = 33,
|
||||
SVT_TEXTURECUBEARRAY = 34,
|
||||
SVT_HULLSHADER = 35,
|
||||
SVT_DOMAINSHADER = 36,
|
||||
SVT_INTERFACE_POINTER = 37,
|
||||
SVT_COMPUTESHADER = 38,
|
||||
SVT_DOUBLE = 39,
|
||||
SVT_RWTEXTURE1D = 40,
|
||||
SVT_RWTEXTURE1DARRAY = 41,
|
||||
SVT_RWTEXTURE2D = 42,
|
||||
SVT_RWTEXTURE2DARRAY = 43,
|
||||
SVT_RWTEXTURE3D = 44,
|
||||
SVT_RWBUFFER = 45,
|
||||
SVT_BYTEADDRESS_BUFFER = 46,
|
||||
SVT_RWBYTEADDRESS_BUFFER = 47,
|
||||
SVT_STRUCTURED_BUFFER = 48,
|
||||
SVT_RWSTRUCTURED_BUFFER = 49,
|
||||
SVT_APPEND_STRUCTURED_BUFFER = 50,
|
||||
SVT_CONSUME_STRUCTURED_BUFFER = 51,
|
||||
|
||||
|
||||
// Only used as a marker when analyzing register types
|
||||
SVT_FORCED_INT = 152,
|
||||
// Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis
|
||||
SVT_INT_AMBIGUOUS = 153,
|
||||
|
||||
// Partial precision types. Used when doing type analysis
|
||||
SVT_FLOAT10 = 53, // Seems to be used in constant buffers
|
||||
SVT_FLOAT16 = 54,
|
||||
SVT_INT16 = 156,
|
||||
SVT_INT12 = 157,
|
||||
SVT_UINT16 = 158,
|
||||
|
||||
SVT_FORCE_DWORD = 0x7fffffff
|
||||
} SHADER_VARIABLE_TYPE;
|
||||
|
||||
typedef enum _SHADER_VARIABLE_CLASS
|
||||
{
|
||||
SVC_SCALAR = 0,
|
||||
SVC_VECTOR = (SVC_SCALAR + 1),
|
||||
SVC_MATRIX_ROWS = (SVC_VECTOR + 1),
|
||||
SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1),
|
||||
SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1),
|
||||
SVC_STRUCT = (SVC_OBJECT + 1),
|
||||
SVC_INTERFACE_CLASS = (SVC_STRUCT + 1),
|
||||
SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1),
|
||||
SVC_FORCE_DWORD = 0x7fffffff
|
||||
} SHADER_VARIABLE_CLASS;
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Types
|
||||
|
||||
enum TESSELLATOR_PARTITIONING
|
||||
{
|
||||
TESSELLATOR_PARTITIONING_UNDEFINED = 0,
|
||||
TESSELLATOR_PARTITIONING_INTEGER = 1,
|
||||
TESSELLATOR_PARTITIONING_POW2 = 2,
|
||||
TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3,
|
||||
TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4
|
||||
};
|
||||
|
||||
enum TESSELLATOR_OUTPUT_PRIMITIVE
|
||||
{
|
||||
TESSELLATOR_OUTPUT_UNDEFINED = 0,
|
||||
TESSELLATOR_OUTPUT_POINT = 1,
|
||||
TESSELLATOR_OUTPUT_LINE = 2,
|
||||
TESSELLATOR_OUTPUT_TRIANGLE_CW = 3,
|
||||
TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4
|
||||
};
|
||||
|
||||
typedef enum TESSELLATOR_DOMAIN
|
||||
{
|
||||
TESSELLATOR_DOMAIN_UNDEFINED = 0,
|
||||
TESSELLATOR_DOMAIN_ISOLINE = 1,
|
||||
TESSELLATOR_DOMAIN_TRI = 2,
|
||||
TESSELLATOR_DOMAIN_QUAD = 3
|
||||
} TESSELLATOR_DOMAIN;
|
||||
|
||||
enum SPECIAL_NAME
|
||||
{
|
||||
NAME_UNDEFINED = 0,
|
||||
NAME_POSITION = 1,
|
||||
NAME_CLIP_DISTANCE = 2,
|
||||
NAME_CULL_DISTANCE = 3,
|
||||
NAME_RENDER_TARGET_ARRAY_INDEX = 4,
|
||||
NAME_VIEWPORT_ARRAY_INDEX = 5,
|
||||
NAME_VERTEX_ID = 6,
|
||||
NAME_PRIMITIVE_ID = 7,
|
||||
NAME_INSTANCE_ID = 8,
|
||||
NAME_IS_FRONT_FACE = 9,
|
||||
NAME_SAMPLE_INDEX = 10,
|
||||
// The following are added for D3D11
|
||||
NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11,
|
||||
NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12,
|
||||
NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13,
|
||||
NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14,
|
||||
NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15,
|
||||
NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16,
|
||||
NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17,
|
||||
NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18,
|
||||
NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19,
|
||||
NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20,
|
||||
NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21,
|
||||
NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22,
|
||||
};
|
||||
|
||||
|
||||
enum INOUT_COMPONENT_TYPE
|
||||
{
|
||||
INOUT_COMPONENT_UNKNOWN = 0,
|
||||
INOUT_COMPONENT_UINT32 = 1,
|
||||
INOUT_COMPONENT_SINT32 = 2,
|
||||
INOUT_COMPONENT_FLOAT32 = 3
|
||||
};
|
||||
|
||||
enum MIN_PRECISION
|
||||
{
|
||||
MIN_PRECISION_DEFAULT = 0,
|
||||
MIN_PRECISION_FLOAT_16 = 1,
|
||||
MIN_PRECISION_FLOAT_2_8 = 2,
|
||||
MIN_PRECISION_RESERVED = 3,
|
||||
MIN_PRECISION_SINT_16 = 4,
|
||||
MIN_PRECISION_UINT_16 = 5,
|
||||
MIN_PRECISION_ANY_16 = 0xf0,
|
||||
MIN_PRECISION_ANY_10 = 0xf1
|
||||
};
|
||||
|
||||
enum ResourceType
|
||||
{
|
||||
RTYPE_CBUFFER,//0
|
||||
RTYPE_TBUFFER,//1
|
||||
RTYPE_TEXTURE,//2
|
||||
RTYPE_SAMPLER,//3
|
||||
RTYPE_UAV_RWTYPED,//4
|
||||
RTYPE_STRUCTURED,//5
|
||||
RTYPE_UAV_RWSTRUCTURED,//6
|
||||
RTYPE_BYTEADDRESS,//7
|
||||
RTYPE_UAV_RWBYTEADDRESS,//8
|
||||
RTYPE_UAV_APPEND_STRUCTURED,//9
|
||||
RTYPE_UAV_CONSUME_STRUCTURED,//10
|
||||
RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11
|
||||
RTYPE_COUNT,
|
||||
};
|
||||
|
||||
enum ResourceGroup
|
||||
{
|
||||
RGROUP_CBUFFER,
|
||||
RGROUP_TEXTURE,
|
||||
RGROUP_SAMPLER,
|
||||
RGROUP_UAV,
|
||||
RGROUP_COUNT,
|
||||
};
|
||||
|
||||
enum REFLECT_RESOURCE_DIMENSION
|
||||
{
|
||||
REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0,
|
||||
REFLECT_RESOURCE_DIMENSION_BUFFER = 1,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9,
|
||||
REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
|
||||
REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11,
|
||||
};
|
||||
|
||||
enum REFLECT_RESOURCE_PRECISION
|
||||
{
|
||||
REFLECT_RESOURCE_PRECISION_UNKNOWN = 0,
|
||||
REFLECT_RESOURCE_PRECISION_LOWP = 1,
|
||||
REFLECT_RESOURCE_PRECISION_MEDIUMP = 2,
|
||||
REFLECT_RESOURCE_PRECISION_HIGHP = 3,
|
||||
};
|
||||
|
||||
enum RESOURCE_RETURN_TYPE
|
||||
{
|
||||
RETURN_TYPE_UNORM = 1,
|
||||
RETURN_TYPE_SNORM = 2,
|
||||
RETURN_TYPE_SINT = 3,
|
||||
RETURN_TYPE_UINT = 4,
|
||||
RETURN_TYPE_FLOAT = 5,
|
||||
RETURN_TYPE_MIXED = 6,
|
||||
RETURN_TYPE_DOUBLE = 7,
|
||||
RETURN_TYPE_CONTINUED = 8,
|
||||
RETURN_TYPE_UNUSED = 9,
|
||||
};
|
||||
|
||||
typedef std::map<std::string, REFLECT_RESOURCE_PRECISION> HLSLccSamplerPrecisionInfo;
|
||||
|
||||
struct ResourceBinding
|
||||
{
|
||||
std::string name;
|
||||
ResourceType eType;
|
||||
uint32_t ui32BindPoint;
|
||||
uint32_t ui32BindCount;
|
||||
uint32_t ui32Flags;
|
||||
uint32_t ui32Space;
|
||||
uint32_t ui32RangeID;
|
||||
REFLECT_RESOURCE_DIMENSION eDimension;
|
||||
RESOURCE_RETURN_TYPE ui32ReturnType;
|
||||
uint32_t ui32NumSamples;
|
||||
REFLECT_RESOURCE_PRECISION ePrecision;
|
||||
int m_SamplerMode; // (SB_SAMPLER_MODE) For samplers, this is the sampler mode this sampler is declared with
|
||||
|
||||
SHADER_VARIABLE_TYPE GetDataType() const
|
||||
{
|
||||
switch (ePrecision)
|
||||
{
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
switch (ui32ReturnType)
|
||||
{
|
||||
case RETURN_TYPE_UNORM:
|
||||
case RETURN_TYPE_SNORM:
|
||||
case RETURN_TYPE_FLOAT:
|
||||
return SVT_FLOAT10;
|
||||
case RETURN_TYPE_SINT:
|
||||
return SVT_INT16;
|
||||
case RETURN_TYPE_UINT:
|
||||
return SVT_UINT16;
|
||||
default:
|
||||
// ASSERT(0);
|
||||
return SVT_FLOAT10;
|
||||
}
|
||||
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
switch (ui32ReturnType)
|
||||
{
|
||||
case RETURN_TYPE_UNORM:
|
||||
case RETURN_TYPE_SNORM:
|
||||
case RETURN_TYPE_FLOAT:
|
||||
return SVT_FLOAT16;
|
||||
case RETURN_TYPE_SINT:
|
||||
return SVT_INT16;
|
||||
case RETURN_TYPE_UINT:
|
||||
return SVT_UINT16;
|
||||
default:
|
||||
// ASSERT(0);
|
||||
return SVT_FLOAT16;
|
||||
}
|
||||
|
||||
default:
|
||||
switch (ui32ReturnType)
|
||||
{
|
||||
case RETURN_TYPE_UNORM:
|
||||
case RETURN_TYPE_SNORM:
|
||||
case RETURN_TYPE_FLOAT:
|
||||
return SVT_FLOAT;
|
||||
case RETURN_TYPE_SINT:
|
||||
return SVT_INT;
|
||||
case RETURN_TYPE_UINT:
|
||||
return SVT_UINT;
|
||||
case RETURN_TYPE_DOUBLE:
|
||||
return SVT_DOUBLE;
|
||||
default:
|
||||
// ASSERT(0);
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct ShaderVarType
|
||||
{
|
||||
ShaderVarType() :
|
||||
Class(),
|
||||
Type(),
|
||||
Rows(),
|
||||
Columns(),
|
||||
Elements(),
|
||||
MemberCount(),
|
||||
Offset(),
|
||||
ParentCount(),
|
||||
Parent(),
|
||||
m_IsUsed(false)
|
||||
{}
|
||||
|
||||
SHADER_VARIABLE_CLASS Class;
|
||||
SHADER_VARIABLE_TYPE Type;
|
||||
uint32_t Rows;
|
||||
uint32_t Columns;
|
||||
uint32_t Elements;
|
||||
uint32_t MemberCount;
|
||||
uint32_t Offset;
|
||||
std::string name;
|
||||
|
||||
uint32_t ParentCount;
|
||||
struct ShaderVarType * Parent;
|
||||
//Includes all parent names.
|
||||
std::string fullName;
|
||||
|
||||
std::vector<struct ShaderVarType> Members;
|
||||
|
||||
bool m_IsUsed; // If not set, is not used in the shader code
|
||||
|
||||
uint32_t GetMemberCount() const
|
||||
{
|
||||
if (Class == SVC_STRUCT)
|
||||
{
|
||||
uint32_t res = 0;
|
||||
std::vector<struct ShaderVarType>::const_iterator itr;
|
||||
for (itr = Members.begin(); itr != Members.end(); itr++)
|
||||
{
|
||||
res += itr->GetMemberCount();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
struct ShaderVar
|
||||
{
|
||||
std::string name;
|
||||
int haveDefaultValue;
|
||||
std::vector<uint32_t> pui32DefaultValues;
|
||||
//Offset/Size in bytes.
|
||||
uint32_t ui32StartOffset;
|
||||
uint32_t ui32Size;
|
||||
|
||||
ShaderVarType sType;
|
||||
};
|
||||
|
||||
struct ConstantBuffer
|
||||
{
|
||||
std::string name;
|
||||
|
||||
std::vector<ShaderVar> asVars;
|
||||
|
||||
uint32_t ui32TotalSizeInBytes;
|
||||
|
||||
uint32_t GetMemberCount(bool stripUnused) const
|
||||
{
|
||||
uint32_t res = 0;
|
||||
std::vector<ShaderVar>::const_iterator itr;
|
||||
for (itr = asVars.begin(); itr != asVars.end(); itr++)
|
||||
{
|
||||
if (stripUnused && !itr->sType.m_IsUsed)
|
||||
continue;
|
||||
res += itr->sType.GetMemberCount();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
struct ClassType
|
||||
{
|
||||
std::string name;
|
||||
uint16_t ui16ID;
|
||||
uint16_t ui16ConstBufStride;
|
||||
uint16_t ui16Texture;
|
||||
uint16_t ui16Sampler;
|
||||
};
|
||||
|
||||
struct ClassInstance
|
||||
{
|
||||
std::string name;
|
||||
uint16_t ui16ID;
|
||||
uint16_t ui16ConstBuf;
|
||||
uint16_t ui16ConstBufOffset;
|
||||
uint16_t ui16Texture;
|
||||
uint16_t ui16Sampler;
|
||||
};
|
||||
|
||||
class Operand;
|
||||
|
||||
class ShaderInfo
|
||||
{
|
||||
public:
|
||||
|
||||
struct InOutSignature
|
||||
{
|
||||
std::string semanticName;
|
||||
uint32_t ui32SemanticIndex;
|
||||
SPECIAL_NAME eSystemValueType;
|
||||
INOUT_COMPONENT_TYPE eComponentType;
|
||||
uint32_t ui32Register;
|
||||
uint32_t ui32Mask;
|
||||
uint32_t ui32ReadWriteMask;
|
||||
|
||||
int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle
|
||||
|
||||
uint32_t ui32Stream;
|
||||
MIN_PRECISION eMinPrec;
|
||||
|
||||
std::set<uint32_t> isIndexed; // Set of phases where this input/output is part of a index range.
|
||||
std::map<uint32_t, uint32_t> indexStart; // If indexed, contains the start index for the range
|
||||
std::map<uint32_t, uint32_t> index; // If indexed, contains the current index relative to the index start.
|
||||
};
|
||||
|
||||
ShaderInfo() :
|
||||
ui32MajorVersion(),
|
||||
ui32MinorVersion(),
|
||||
psResourceBindings(),
|
||||
psConstantBuffers(),
|
||||
psThisPointerConstBuffer(),
|
||||
psClassTypes(),
|
||||
psClassInstances()
|
||||
{}
|
||||
|
||||
SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo);
|
||||
|
||||
int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const;
|
||||
|
||||
void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const;
|
||||
|
||||
int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const;
|
||||
|
||||
int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
|
||||
int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
|
||||
int GetOutputSignatureFromRegister(const uint32_t ui32Register,
|
||||
const uint32_t ui32CompMask,
|
||||
const uint32_t ui32Stream,
|
||||
const InOutSignature** ppsOut,
|
||||
bool allowNull = false) const;
|
||||
|
||||
int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const;
|
||||
|
||||
static ResourceGroup ResourceTypeToResourceGroup(ResourceType);
|
||||
|
||||
static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize = false);
|
||||
|
||||
static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
|
||||
const uint32_t(&pui32Swizzle)[4],
|
||||
const ConstantBuffer* psCBuf,
|
||||
const ShaderVarType** ppsShaderVar,
|
||||
bool* isArray,
|
||||
std::vector<uint32_t>* arrayIndices,
|
||||
int32_t* pi32Rebase,
|
||||
uint32_t flags);
|
||||
|
||||
static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector<uint32_t>& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors);
|
||||
|
||||
// Apply shader precision information to resource bindings
|
||||
void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info);
|
||||
|
||||
uint32_t ui32MajorVersion;
|
||||
uint32_t ui32MinorVersion;
|
||||
|
||||
std::vector<InOutSignature> psInputSignatures;
|
||||
std::vector<InOutSignature> psOutputSignatures;
|
||||
std::vector<InOutSignature> psPatchConstantSignatures;
|
||||
|
||||
std::vector<ResourceBinding> psResourceBindings;
|
||||
|
||||
std::vector<ConstantBuffer> psConstantBuffers;
|
||||
ConstantBuffer* psThisPointerConstBuffer;
|
||||
|
||||
std::vector<ClassType> psClassTypes;
|
||||
std::vector<ClassInstance> psClassInstances;
|
||||
|
||||
//Func table ID to class name ID.
|
||||
HLSLcc::growing_vector<uint32_t> aui32TableIDToTypeID;
|
||||
|
||||
HLSLcc::growing_vector<uint32_t> aui32ResourceMap[RGROUP_COUNT];
|
||||
|
||||
HLSLcc::growing_vector<ShaderVarType> sGroupSharedVarType;
|
||||
|
||||
TESSELLATOR_PARTITIONING eTessPartitioning;
|
||||
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
|
||||
uint32_t ui32TessInputControlPointCount;
|
||||
uint32_t ui32TessOutputControlPointCount;
|
||||
TESSELLATOR_DOMAIN eTessDomain;
|
||||
bool bEarlyFragmentTests;
|
||||
};
|
23
third_party/HLSLcc/include/UnityInstancingFlexibleArraySize.h
vendored
Normal file
23
third_party/HLSLcc/include/UnityInstancingFlexibleArraySize.h
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
// In Unity, instancing array sizes should be able to be dynamically patched at runtime by defining the macro.
|
||||
|
||||
#include <string>
|
||||
#define UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "UNITY_RUNTIME_INSTANCING_ARRAY_SIZE"
|
||||
#define UNITY_PRETRANSFORM_CONSTANT_NAME "UnityDisplayOrientationPreTransform"
|
||||
|
||||
const unsigned int kArraySizeConstantID = 0;
|
||||
const unsigned int kPreTransformConstantID = 1;
|
||||
|
||||
// TODO: share with Runtime/GfxDevice/InstancingUtilities.h
|
||||
inline bool IsUnityInstancingConstantBufferName(const char* cbName)
|
||||
{
|
||||
static const char kInstancedCbNamePrefix[] = "UnityInstancing";
|
||||
return strncmp(cbName, kInstancedCbNamePrefix, sizeof(kInstancedCbNamePrefix) - 1) == 0;
|
||||
}
|
||||
|
||||
inline bool IsPreTransformConstantBufferName(const char* cbName)
|
||||
{
|
||||
static const char kPreTransformCbNamePrefix[] = "UnityDisplayOrientationPreTransformData";
|
||||
return strncmp(cbName, kPreTransformCbNamePrefix, sizeof(kPreTransformCbNamePrefix) - 1) == 0;
|
||||
}
|
45
third_party/HLSLcc/include/growing_array.h
vendored
Normal file
45
third_party/HLSLcc/include/growing_array.h
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
// A vector that automatically grows when written to, fills the intermediate ones with default value.
|
||||
// Reading from an index returns the default value if attempting to access out of bounds.
|
||||
template<class T> class growing_vector
|
||||
{
|
||||
public:
|
||||
growing_vector() : data() {}
|
||||
|
||||
std::vector<T> data;
|
||||
|
||||
T & operator[](std::size_t idx)
|
||||
{
|
||||
if (idx >= data.size())
|
||||
data.resize((idx + 1) * 2);
|
||||
return data[idx];
|
||||
}
|
||||
|
||||
const T & operator[](std::size_t idx) const
|
||||
{
|
||||
static T defaultValue = T();
|
||||
if (idx >= data.size())
|
||||
return defaultValue;
|
||||
return data[idx];
|
||||
}
|
||||
};
|
||||
|
||||
// Same but with bool specialization
|
||||
template<> class growing_vector<bool>
|
||||
{
|
||||
public:
|
||||
growing_vector() : data() {}
|
||||
|
||||
std::vector<bool> data;
|
||||
|
||||
std::vector<bool>::reference operator[](std::size_t idx)
|
||||
{
|
||||
if (idx >= data.size())
|
||||
data.resize((idx + 1) * 2, false);
|
||||
return data[idx];
|
||||
}
|
||||
};
|
||||
}
|
816
third_party/HLSLcc/include/hlslcc.h
vendored
Normal file
816
third_party/HLSLcc/include/hlslcc.h
vendored
Normal file
@ -0,0 +1,816 @@
|
||||
#ifndef HLSLCC_H_
|
||||
#define HLSLCC_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
|
||||
#if defined(_WIN32) && defined(HLSLCC_DYNLIB)
|
||||
#define HLSLCC_APIENTRY __stdcall
|
||||
#if defined(libHLSLcc_EXPORTS)
|
||||
#define HLSLCC_API __declspec(dllexport)
|
||||
#else
|
||||
#define HLSLCC_API __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define HLSLCC_APIENTRY
|
||||
#define HLSLCC_API
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
LANG_DEFAULT,// Depends on the HLSL shader model.
|
||||
LANG_ES_100, LANG_ES_FIRST = LANG_ES_100,
|
||||
LANG_ES_300,
|
||||
LANG_ES_310, LANG_ES_LAST = LANG_ES_310,
|
||||
LANG_120, LANG_GL_FIRST = LANG_120,
|
||||
LANG_130,
|
||||
LANG_140,
|
||||
LANG_150,
|
||||
LANG_330,
|
||||
LANG_400,
|
||||
LANG_410,
|
||||
LANG_420,
|
||||
LANG_430,
|
||||
LANG_440, LANG_GL_LAST = LANG_440,
|
||||
LANG_METAL,
|
||||
} GLLang;
|
||||
|
||||
typedef struct GlExtensions
|
||||
{
|
||||
uint32_t ARB_explicit_attrib_location : 1;
|
||||
uint32_t ARB_explicit_uniform_location : 1;
|
||||
uint32_t ARB_shading_language_420pack : 1;
|
||||
uint32_t OVR_multiview : 1;
|
||||
uint32_t EXT_shader_framebuffer_fetch : 1;
|
||||
} GlExtensions;
|
||||
|
||||
#include "ShaderInfo.h"
|
||||
#include "UnityInstancingFlexibleArraySize.h"
|
||||
|
||||
typedef std::vector<std::string> TextureSamplerPairs;
|
||||
|
||||
typedef enum INTERPOLATION_MODE
|
||||
{
|
||||
INTERPOLATION_UNDEFINED = 0,
|
||||
INTERPOLATION_CONSTANT = 1,
|
||||
INTERPOLATION_LINEAR = 2,
|
||||
INTERPOLATION_LINEAR_CENTROID = 3,
|
||||
INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
|
||||
INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
|
||||
INTERPOLATION_LINEAR_SAMPLE = 6,
|
||||
INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7,
|
||||
} INTERPOLATION_MODE;
|
||||
|
||||
#define PS_FLAG_VERTEX_SHADER 0x1
|
||||
#define PS_FLAG_HULL_SHADER 0x2
|
||||
#define PS_FLAG_DOMAIN_SHADER 0x4
|
||||
#define PS_FLAG_GEOMETRY_SHADER 0x8
|
||||
#define PS_FLAG_PIXEL_SHADER 0x10
|
||||
|
||||
#define TO_FLAG_NONE 0x0
|
||||
#define TO_FLAG_INTEGER 0x1
|
||||
#define TO_FLAG_NAME_ONLY 0x2
|
||||
#define TO_FLAG_DECLARATION_NAME 0x4
|
||||
#define TO_FLAG_DESTINATION 0x8 //Operand is being written to by assignment.
|
||||
#define TO_FLAG_UNSIGNED_INTEGER 0x10
|
||||
#define TO_FLAG_DOUBLE 0x20
|
||||
// --- TO_AUTO_BITCAST_TO_FLOAT ---
|
||||
//If the operand is an integer temp variable then this flag
|
||||
//indicates that the temp has a valid floating point encoding
|
||||
//and that the current expression expects the operand to be floating point
|
||||
//and therefore intBitsToFloat must be applied to that variable.
|
||||
#define TO_AUTO_BITCAST_TO_FLOAT 0x40
|
||||
#define TO_AUTO_BITCAST_TO_INT 0x80
|
||||
#define TO_AUTO_BITCAST_TO_UINT 0x100
|
||||
// AUTO_EXPAND flags automatically expand the operand to at least (i/u)vecX
|
||||
// to match HLSL functionality.
|
||||
#define TO_AUTO_EXPAND_TO_VEC2 0x200
|
||||
#define TO_AUTO_EXPAND_TO_VEC3 0x400
|
||||
#define TO_AUTO_EXPAND_TO_VEC4 0x800
|
||||
#define TO_FLAG_BOOL 0x1000
|
||||
// These flags are only used for Metal:
|
||||
// Force downscaling of the operand to match
|
||||
// the other operand (Metal doesn't like mixing halfs with floats)
|
||||
#define TO_FLAG_FORCE_HALF 0x2000
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INVALID_SHADER = -1,
|
||||
PIXEL_SHADER,
|
||||
VERTEX_SHADER,
|
||||
GEOMETRY_SHADER,
|
||||
HULL_SHADER,
|
||||
DOMAIN_SHADER,
|
||||
COMPUTE_SHADER,
|
||||
} SHADER_TYPE;
|
||||
|
||||
// Enum for texture dimension reflection data
|
||||
typedef enum
|
||||
{
|
||||
TD_FLOAT = 0,
|
||||
TD_INT,
|
||||
TD_2D,
|
||||
TD_3D,
|
||||
TD_CUBE,
|
||||
TD_2DSHADOW,
|
||||
TD_2DARRAY,
|
||||
TD_CUBEARRAY
|
||||
} HLSLCC_TEX_DIMENSION;
|
||||
|
||||
// The prefix for all temporary variables used by the generated code.
|
||||
// Using a texture or uniform name like this will cause conflicts
|
||||
#define HLSLCC_TEMP_PREFIX "u_xlat"
|
||||
|
||||
typedef std::vector<std::pair<std::string, std::string> > MemberDefinitions;
|
||||
|
||||
// We store struct definition contents inside a vector of strings
|
||||
struct StructDefinition
|
||||
{
|
||||
StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {}
|
||||
|
||||
MemberDefinitions m_Members; // A vector of strings with the struct members
|
||||
std::vector<std::string> m_Dependencies; // A vector of struct names this struct depends on.
|
||||
bool m_IsPrinted; // Has this struct been printed out yet?
|
||||
};
|
||||
|
||||
typedef std::map<std::string, StructDefinition> StructDefinitions;
|
||||
|
||||
// Map of extra function definitions we need to add before the shader body but after the declarations.
|
||||
typedef std::map<std::string, std::string> FunctionDefinitions;
|
||||
|
||||
// A helper class for allocating binding slots
|
||||
// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc)
|
||||
class BindingSlotAllocator
|
||||
{
|
||||
typedef std::map<uint32_t, uint32_t> SlotMap;
|
||||
SlotMap m_Allocations;
|
||||
uint32_t m_ShaderStageAllocations;
|
||||
public:
|
||||
BindingSlotAllocator() : m_Allocations(), m_ShaderStageAllocations(0)
|
||||
{
|
||||
for (int i = MAX_RESOURCE_BINDINGS - 1; i >= 0; i--)
|
||||
m_FreeSlots.push_back(i);
|
||||
}
|
||||
|
||||
enum BindType
|
||||
{
|
||||
ConstantBuffer = 0,
|
||||
RWBuffer,
|
||||
Texture,
|
||||
UAV
|
||||
};
|
||||
|
||||
uint32_t GetBindingSlot(uint32_t regNo, BindType type)
|
||||
{
|
||||
// The key is regNumber with the bindtype stored to highest 16 bits
|
||||
uint32_t key = (m_ShaderStageAllocations + regNo) | (uint32_t(type) << 16);
|
||||
SlotMap::iterator itr = m_Allocations.find(key);
|
||||
if (itr == m_Allocations.end())
|
||||
{
|
||||
uint32_t slot = m_FreeSlots.back();
|
||||
m_FreeSlots.pop_back();
|
||||
m_Allocations.insert(std::make_pair(key, slot));
|
||||
return slot;
|
||||
}
|
||||
return itr->second;
|
||||
}
|
||||
|
||||
// Func for reserving binding slots with the original reg number.
|
||||
// Used for fragment shader UAVs (SetRandomWriteTarget etc).
|
||||
void ReserveBindingSlot(uint32_t regNo, BindType type)
|
||||
{
|
||||
uint32_t key = regNo | (uint32_t(type) << 16);
|
||||
m_Allocations.insert(std::make_pair(key, regNo));
|
||||
|
||||
// Remove regNo from free slots
|
||||
for (int i = m_FreeSlots.size() - 1; i >= 0; i--)
|
||||
{
|
||||
if (m_FreeSlots[i] == regNo)
|
||||
{
|
||||
m_FreeSlots.erase(m_FreeSlots.begin() + i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t PeekFirstFreeSlot() const
|
||||
{
|
||||
return m_FreeSlots.back();
|
||||
}
|
||||
|
||||
uint32_t SaveTotalShaderStageAllocationsCount()
|
||||
{
|
||||
m_ShaderStageAllocations = m_Allocations.size();
|
||||
return m_ShaderStageAllocations;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<uint32_t> m_FreeSlots;
|
||||
};
|
||||
|
||||
//The shader stages (Vertex, Pixel et al) do not depend on each other
|
||||
//in HLSL. GLSL is a different story. HLSLCrossCompiler requires
|
||||
//that hull shaders must be compiled before domain shaders, and
|
||||
//the pixel shader must be compiled before all of the others.
|
||||
//During compilation the GLSLCrossDependencyData struct will
|
||||
//carry over any information needed about a different shader stage
|
||||
//in order to construct valid GLSL shader combinations.
|
||||
|
||||
|
||||
//Using GLSLCrossDependencyData is optional. However some shader
|
||||
//combinations may show link failures, or runtime errors.
|
||||
class GLSLCrossDependencyData
|
||||
{
|
||||
public:
|
||||
|
||||
struct GLSLBufferBindPointInfo
|
||||
{
|
||||
uint32_t slot;
|
||||
bool known;
|
||||
};
|
||||
|
||||
// A container for a single Vulkan resource binding (<set, binding> pair)
|
||||
struct VulkanResourceBinding
|
||||
{
|
||||
uint32_t set;
|
||||
uint32_t binding;
|
||||
};
|
||||
|
||||
enum GLSLBufferType
|
||||
{
|
||||
BufferType_ReadWrite,
|
||||
BufferType_Constant,
|
||||
BufferType_SSBO,
|
||||
BufferType_Texture,
|
||||
BufferType_UBO,
|
||||
|
||||
BufferType_Count,
|
||||
BufferType_Generic = BufferType_ReadWrite
|
||||
};
|
||||
|
||||
private:
|
||||
//Required if PixelInterpDependency is true
|
||||
std::vector<INTERPOLATION_MODE> pixelInterpolation;
|
||||
|
||||
// Map of varying locations, indexed by varying names.
|
||||
typedef std::map<std::string, uint32_t> VaryingLocations;
|
||||
|
||||
static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output)
|
||||
|
||||
VaryingLocations varyingLocationsMap[MAX_NAMESPACES];
|
||||
uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES];
|
||||
|
||||
typedef std::map<std::string, VulkanResourceBinding> VulkanResourceBindings;
|
||||
VulkanResourceBindings m_VulkanResourceBindings;
|
||||
uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set.
|
||||
|
||||
typedef std::map<std::string, uint32_t> GLSLResouceBindings;
|
||||
|
||||
public:
|
||||
GLSLResouceBindings m_GLSLResourceBindings;
|
||||
uint32_t m_NextAvailableGLSLResourceBinding[BufferType_Count]; // UAV, Constant and Buffers have seperate binding ranges
|
||||
uint32_t m_StructuredBufferBindPoints[MAX_RESOURCE_BINDINGS]; // for the old style bindings
|
||||
|
||||
inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput)
|
||||
{
|
||||
switch (eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
return isInput ? 0 : 1;
|
||||
|
||||
case HULL_SHADER:
|
||||
return isInput ? 1 : 2;
|
||||
|
||||
case DOMAIN_SHADER:
|
||||
return isInput ? 2 : 3;
|
||||
|
||||
case GEOMETRY_SHADER:
|
||||
// The input depends on whether there's a tessellation shader before us
|
||||
if (isInput)
|
||||
{
|
||||
return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1;
|
||||
}
|
||||
return 4;
|
||||
|
||||
case PIXEL_SHADER:
|
||||
// The inputs can come from geom shader, domain shader or directly from vertex shader
|
||||
if (isInput)
|
||||
{
|
||||
if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 5; // This value never really used
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
GLSLCrossDependencyData()
|
||||
: eTessPartitioning(),
|
||||
eTessOutPrim(),
|
||||
fMaxTessFactor(64.0),
|
||||
numPatchesInThreadGroup(0),
|
||||
hasControlPoint(false),
|
||||
hasPatchConstant(false),
|
||||
ui32ProgramStages(0),
|
||||
m_ExtBlendModes()
|
||||
{
|
||||
memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation));
|
||||
memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding));
|
||||
memset(m_NextAvailableGLSLResourceBinding, 0, sizeof(m_NextAvailableGLSLResourceBinding));
|
||||
}
|
||||
|
||||
// Retrieve the location for a varying with a given name.
|
||||
// If the name doesn't already have an allocated location, allocate one
|
||||
// and store it into the map.
|
||||
inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput, bool keepLocation, uint32_t maxSemanticIndex)
|
||||
{
|
||||
int nspace = GetVaryingNamespace(eShaderType, isInput);
|
||||
VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name);
|
||||
if (itr != varyingLocationsMap[nspace].end())
|
||||
return itr->second;
|
||||
|
||||
if (keepLocation)
|
||||
{
|
||||
// Try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11)
|
||||
|
||||
// Inspect last 2 characters in name
|
||||
size_t len = name.length();
|
||||
|
||||
if (len > 1)
|
||||
{
|
||||
if (isdigit(name[len - 1]))
|
||||
{
|
||||
uint32_t index = 0;
|
||||
if (isdigit(name[len - 2]))
|
||||
index = atoi(&name[len - 2]); // 2-digits index
|
||||
else
|
||||
index = atoi(&name[len - 1]); // 1-digit index
|
||||
|
||||
if (index < 32) // Some platforms only allow 32 varying locations
|
||||
{
|
||||
// Check that index is not already used
|
||||
bool canUseIndex = true;
|
||||
for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it)
|
||||
{
|
||||
if (it->second == index)
|
||||
{
|
||||
canUseIndex = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (canUseIndex)
|
||||
{
|
||||
varyingLocationsMap[nspace].insert(std::make_pair(name, index));
|
||||
return index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fallback: pick an unused index (max of already allocated AND of semanticIndices found by SignatureAnalysis
|
||||
uint32_t maxIndexAlreadyAssigned = 0;
|
||||
for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it)
|
||||
maxIndexAlreadyAssigned = std::max(maxIndexAlreadyAssigned, it->second);
|
||||
|
||||
uint32_t fallbackIndex = std::max(maxIndexAlreadyAssigned + 1, maxSemanticIndex + 1);
|
||||
varyingLocationsMap[nspace].insert(std::make_pair(name, fallbackIndex));
|
||||
return fallbackIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t newKey = nextAvailableVaryingLocation[nspace];
|
||||
nextAvailableVaryingLocation[nspace]++;
|
||||
varyingLocationsMap[nspace].insert(std::make_pair(name, newKey));
|
||||
return newKey;
|
||||
}
|
||||
}
|
||||
|
||||
// Retrieve the binding for a resource (texture, constant buffer, image) with a given name
|
||||
// If not found, allocate a new one (in set 0) and return that
|
||||
// The returned value is a pair of <set, binding>
|
||||
// If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name)
|
||||
// will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified
|
||||
// if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter'
|
||||
inline VulkanResourceBinding GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0)
|
||||
{
|
||||
// scan for the special marker
|
||||
const char *marker = "Xhlslcc_set_%d_bind_%dX";
|
||||
uint32_t Set = 0, Binding = 0;
|
||||
size_t startLoc = name.find("Xhlslcc");
|
||||
if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2))
|
||||
{
|
||||
// Get rid of all markers
|
||||
while ((startLoc = name.find("Xhlslcc")) != std::string::npos)
|
||||
{
|
||||
size_t endLoc = name.find('X', startLoc + 1);
|
||||
if (endLoc == std::string::npos)
|
||||
break;
|
||||
name.erase(startLoc, endLoc - startLoc + 1);
|
||||
}
|
||||
// Add to map
|
||||
VulkanResourceBinding newBind = { Set, Binding };
|
||||
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
|
||||
if (allocRoomForCounter)
|
||||
{
|
||||
VulkanResourceBinding counterBind = { Set, Binding + 1 };
|
||||
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
|
||||
}
|
||||
|
||||
return newBind;
|
||||
}
|
||||
|
||||
VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name);
|
||||
if (itr != m_VulkanResourceBindings.end())
|
||||
return itr->second;
|
||||
|
||||
// Allocate a new one
|
||||
VulkanResourceBinding newBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] };
|
||||
m_NextAvailableVulkanResourceBinding[preferredSet]++;
|
||||
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
|
||||
if (allocRoomForCounter)
|
||||
{
|
||||
VulkanResourceBinding counterBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] };
|
||||
m_NextAvailableVulkanResourceBinding[preferredSet]++;
|
||||
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
|
||||
}
|
||||
return newBind;
|
||||
}
|
||||
|
||||
// GLSL Bind point handling logic
|
||||
// Handles both 'old style' fill around fixed UAV and new style partitioned offsets with fixed UAV locations
|
||||
|
||||
// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers.
|
||||
// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers.
|
||||
// In this step make m_structuredBufferBindPoints contain increasingly ordered uints starting from zero.
|
||||
// This is only used when we are doing old style binding setup
|
||||
void SetupGLSLResourceBindingSlotsIndices()
|
||||
{
|
||||
for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS; i++)
|
||||
{
|
||||
m_StructuredBufferBindPoints[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
void RemoveBindPointFromAvailableList(uint32_t bindPoint)
|
||||
{
|
||||
for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS - 1 && m_StructuredBufferBindPoints[i] <= bindPoint; i++)
|
||||
{
|
||||
if (m_StructuredBufferBindPoints[i] == bindPoint) // Remove uav binding point from the list by copying array remainder here
|
||||
{
|
||||
memcpy(&m_StructuredBufferBindPoints[i], &m_StructuredBufferBindPoints[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i) * sizeof(uint32_t));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ReserveNamedBindPoint(const std::string &name, uint32_t bindPoint, GLSLBufferType type)
|
||||
{
|
||||
m_GLSLResourceBindings.insert(std::make_pair(name, bindPoint));
|
||||
RemoveBindPointFromAvailableList(bindPoint);
|
||||
}
|
||||
|
||||
bool ShouldUseBufferSpecificBinding(GLSLBufferType bufferType)
|
||||
{
|
||||
return bufferType == BufferType_Constant || bufferType == BufferType_Texture || bufferType == BufferType_UBO;
|
||||
}
|
||||
|
||||
uint32_t GetGLSLBufferBindPointIndex(GLSLBufferType bufferType)
|
||||
{
|
||||
uint32_t binding = -1;
|
||||
|
||||
if (ShouldUseBufferSpecificBinding(bufferType))
|
||||
{
|
||||
binding = m_NextAvailableGLSLResourceBinding[bufferType];
|
||||
}
|
||||
else
|
||||
{
|
||||
binding = m_StructuredBufferBindPoints[m_NextAvailableGLSLResourceBinding[BufferType_Generic]];
|
||||
}
|
||||
|
||||
return binding;
|
||||
}
|
||||
|
||||
void UpdateResourceBindingIndex(GLSLBufferType bufferType)
|
||||
{
|
||||
if (ShouldUseBufferSpecificBinding(bufferType))
|
||||
{
|
||||
m_NextAvailableGLSLResourceBinding[bufferType]++;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_NextAvailableGLSLResourceBinding[BufferType_Generic]++;
|
||||
}
|
||||
}
|
||||
|
||||
inline GLSLBufferBindPointInfo GetGLSLResourceBinding(const std::string &name, GLSLBufferType bufferType)
|
||||
{
|
||||
GLSLResouceBindings::iterator itr = m_GLSLResourceBindings.find(name);
|
||||
if (itr != m_GLSLResourceBindings.end())
|
||||
{
|
||||
return GLSLBufferBindPointInfo{ itr->second, true };
|
||||
}
|
||||
|
||||
uint32_t binding = GetGLSLBufferBindPointIndex(bufferType);
|
||||
UpdateResourceBindingIndex(bufferType);
|
||||
|
||||
m_GLSLResourceBindings.insert(std::make_pair(name, binding));
|
||||
|
||||
return GLSLBufferBindPointInfo{ binding, false };
|
||||
}
|
||||
|
||||
//dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D,
|
||||
//but they appear on inputs inside domain shaders for GL.
|
||||
//Hull shader must be compiled before domain so the
|
||||
//ensure correct partitioning and primitive type information
|
||||
//can be saved when compiling hull and passed to domain compilation.
|
||||
TESSELLATOR_PARTITIONING eTessPartitioning;
|
||||
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
|
||||
float fMaxTessFactor;
|
||||
int numPatchesInThreadGroup;
|
||||
bool hasControlPoint;
|
||||
bool hasPatchConstant;
|
||||
|
||||
// Bitfield for the shader stages this program is going to include (see PS_FLAG_*).
|
||||
// Needed so we can construct proper shader input and output names
|
||||
uint32_t ui32ProgramStages;
|
||||
|
||||
std::vector<std::string> m_ExtBlendModes; // The blend modes (from KHR_blend_equation_advanced) requested for this shader. See ext spec for list.
|
||||
|
||||
inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo)
|
||||
{
|
||||
if (regNo >= pixelInterpolation.size())
|
||||
return INTERPOLATION_UNDEFINED;
|
||||
else
|
||||
return pixelInterpolation[regNo];
|
||||
}
|
||||
|
||||
inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode)
|
||||
{
|
||||
if (regNo >= pixelInterpolation.size())
|
||||
pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED);
|
||||
|
||||
pixelInterpolation[regNo] = mode;
|
||||
}
|
||||
|
||||
struct CompareFirst
|
||||
{
|
||||
CompareFirst(std::string val) : m_Val(val) {}
|
||||
bool operator()(const std::pair<std::string, std::string>& elem) const
|
||||
{
|
||||
return m_Val == elem.first;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string m_Val;
|
||||
};
|
||||
|
||||
inline bool IsMemberDeclared(const std::string &name)
|
||||
{
|
||||
if (std::find_if(m_SharedFunctionMembers.begin(), m_SharedFunctionMembers.end(), CompareFirst(name)) != m_SharedFunctionMembers.end())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
MemberDefinitions m_SharedFunctionMembers;
|
||||
std::vector<std::string> m_SharedDependencies;
|
||||
BindingSlotAllocator m_SharedTextureSlots, m_SharedSamplerSlots;
|
||||
BindingSlotAllocator m_SharedBufferSlots;
|
||||
|
||||
inline void ClearCrossDependencyData()
|
||||
{
|
||||
pixelInterpolation.clear();
|
||||
for (int i = 0; i < MAX_NAMESPACES; i++)
|
||||
{
|
||||
varyingLocationsMap[i].clear();
|
||||
nextAvailableVaryingLocation[i] = 0;
|
||||
}
|
||||
m_SharedFunctionMembers.clear();
|
||||
m_SharedDependencies.clear();
|
||||
}
|
||||
|
||||
bool IsHullShaderInputAlreadyDeclared(const std::string& name)
|
||||
{
|
||||
bool isKnown = false;
|
||||
|
||||
for (size_t idx = 0, end = m_hullShaderInputs.size(); idx < end; ++idx)
|
||||
{
|
||||
if (m_hullShaderInputs[idx] == name)
|
||||
{
|
||||
isKnown = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return isKnown;
|
||||
}
|
||||
|
||||
void RecordHullShaderInput(const std::string& name)
|
||||
{
|
||||
m_hullShaderInputs.push_back(name);
|
||||
}
|
||||
|
||||
std::vector<std::string> m_hullShaderInputs;
|
||||
};
|
||||
|
||||
struct GLSLShader
|
||||
{
|
||||
int shaderType; //One of the GL enums.
|
||||
std::string sourceCode;
|
||||
ShaderInfo reflection;
|
||||
GLLang GLSLLanguage;
|
||||
TextureSamplerPairs textureSamplers; // HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS fills this out
|
||||
};
|
||||
|
||||
// Interface for retrieving reflection and diagnostics data
|
||||
class HLSLccReflection
|
||||
{
|
||||
public:
|
||||
HLSLccReflection() {}
|
||||
virtual ~HLSLccReflection() {}
|
||||
|
||||
// Called on errors or diagnostic messages
|
||||
virtual void OnDiagnostics(const std::string &error, int line, bool isError) {}
|
||||
|
||||
virtual void OnInputBinding(const std::string &name, int bindIndex) {}
|
||||
|
||||
// Returns false if this constant buffer is not needed for this shader. This info can be used for pruning unused
|
||||
// constant buffers and vars from compute shaders where we need broader context than a single kernel to know
|
||||
// if something can be dropped, as the constant buffers are shared between all kernels in a .compute file.
|
||||
virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; }
|
||||
|
||||
// Returns false if this constant var is not needed for this shader. See above.
|
||||
virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize, bool isUsed) { return true; }
|
||||
|
||||
virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {}
|
||||
virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, bool multisampled, HLSLCC_TEX_DIMENSION dim, bool isUAV) {}
|
||||
virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {}
|
||||
virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {}
|
||||
virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {}
|
||||
virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {}
|
||||
|
||||
// these are for now metal only (but can be trivially added for other backends if needed)
|
||||
// they are useful mostly for diagnostics as interim values are actually hidden from user
|
||||
virtual void OnVertexProgramOutput(const std::string& name, const std::string& semantic, int semanticIndex) {}
|
||||
virtual void OnBuiltinOutput(SPECIAL_NAME name) {}
|
||||
virtual void OnFragmentOutputDeclaration(int numComponents, int outputIndex) {}
|
||||
|
||||
|
||||
enum AccessType
|
||||
{
|
||||
ReadAccess = 1 << 0,
|
||||
WriteAccess = 1 << 1
|
||||
};
|
||||
|
||||
virtual void OnStorageImage(int bindIndex, unsigned int access) {}
|
||||
};
|
||||
|
||||
|
||||
/*HLSL constant buffers are treated as default-block unform arrays by default. This is done
|
||||
to support versions of GLSL which lack ARB_uniform_buffer_object functionality.
|
||||
Setting this flag causes each one to have its own uniform block.
|
||||
Note: Currently the nth const buffer will be named UnformBufferN. This is likey to change to the original HLSL name in the future.*/
|
||||
static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT = 0x1;
|
||||
|
||||
static const unsigned int HLSLCC_FLAG_ORIGIN_UPPER_LEFT = 0x2;
|
||||
|
||||
static const unsigned int HLSLCC_FLAG_PIXEL_CENTER_INTEGER = 0x4;
|
||||
|
||||
static const unsigned int HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO = 0x8;
|
||||
|
||||
//GS enabled?
|
||||
//Affects vertex shader (i.e. need to compile vertex shader again to use with/without GS).
|
||||
//This flag is needed in order for the interfaces between stages to match when GS is in use.
|
||||
//PS inputs VtxGeoOutput
|
||||
//GS outputs VtxGeoOutput
|
||||
//Vs outputs VtxOutput if GS enabled. VtxGeoOutput otherwise.
|
||||
static const unsigned int HLSLCC_FLAG_GS_ENABLED = 0x10;
|
||||
|
||||
static const unsigned int HLSLCC_FLAG_TESS_ENABLED = 0x20;
|
||||
|
||||
//Either use this flag or glBindFragDataLocationIndexed.
|
||||
//When set the first pixel shader output is the first input to blend
|
||||
//equation, the others go to the second input.
|
||||
static const unsigned int HLSLCC_FLAG_DUAL_SOURCE_BLENDING = 0x40;
|
||||
|
||||
//If set, shader inputs and outputs are declared with their semantic name.
|
||||
static const unsigned int HLSLCC_FLAG_INOUT_SEMANTIC_NAMES = 0x80;
|
||||
//If set, shader inputs and outputs are declared with their semantic name appended.
|
||||
static const unsigned int HLSLCC_FLAG_INOUT_APPEND_SEMANTIC_NAMES = 0x100;
|
||||
|
||||
//If set, combines texture/sampler pairs used together into samplers named "texturename_X_samplername".
|
||||
static const unsigned int HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS = 0x200;
|
||||
|
||||
//If set, attribute and uniform explicit location qualifiers are disabled (even if the language version supports that)
|
||||
static const unsigned int HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS = 0x400;
|
||||
|
||||
//If set, global uniforms are not stored in a struct.
|
||||
static const unsigned int HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT = 0x800;
|
||||
|
||||
//If set, image declarations will always have binding and format qualifiers.
|
||||
static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000;
|
||||
|
||||
// If set, treats sampler names ending with _highp, _mediump, and _lowp as sampler precision qualifiers
|
||||
// Also removes that prefix from generated output
|
||||
static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000;
|
||||
|
||||
// If set, adds location qualifiers to intra-shader varyings.
|
||||
static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000; // NOTE: obsolete flag (behavior enabled by this flag began default in 83a16a1829cf)
|
||||
|
||||
// If set, wraps all uniform buffer declarations in a preprocessor macro #ifdef HLSLCC_ENABLE_UNIFORM_BUFFERS
|
||||
// so that if that macro is undefined, all UBO declarations will become normal uniforms
|
||||
static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000;
|
||||
|
||||
// If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code
|
||||
static const unsigned int HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS = 0x10000;
|
||||
|
||||
#define HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "hlslcc_mtx%dx%d"
|
||||
|
||||
// If set, translates all matrix declarations into vec4 arrays (as the DX bytecode treats them), and prefixes the name with 'hlslcc_mtx<rows>x<cols>'
|
||||
static const unsigned int HLSLCC_FLAG_TRANSLATE_MATRICES = 0x20000;
|
||||
|
||||
// If set, emits Vulkan-style (set, binding) bindings, also captures that info from any declaration named "<Name>_hlslcc_set_X_bind_Y"
|
||||
// Unless bindings are given explicitly, they are allocated into set 0 (map stored in GLSLCrossDependencyData)
|
||||
static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000;
|
||||
|
||||
// If set, metal output will use linear sampler for shadow compares, otherwise point sampler.
|
||||
static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000;
|
||||
|
||||
// If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead.
|
||||
static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000;
|
||||
|
||||
// Unused 0x200000;
|
||||
|
||||
// If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch
|
||||
static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000;
|
||||
|
||||
// Build for Switch.
|
||||
static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x800000;
|
||||
|
||||
// If set, generate an instance name for constant buffers. GLSL specs 4.5 disallows uniform variables from different constant buffers sharing the same name
|
||||
// as long as they are part of the same final linked program. Uniform buffer instance names solve this cross-shader symbol conflict issue.
|
||||
static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME = 0x1000000;
|
||||
|
||||
// Massage shader steps into Metal compute kernel from vertex/hull shaders + post-tessellation vertex shader from domain shader
|
||||
static const unsigned int HLSLCC_FLAG_METAL_TESSELLATION = 0x2000000;
|
||||
|
||||
// Disable fastmath
|
||||
static const unsigned int HLSLCC_FLAG_DISABLE_FASTMATH = 0x4000000;
|
||||
|
||||
//If set, uniform explicit location qualifiers are enabled (even if the language version doesn't support that)
|
||||
static const unsigned int HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS = 0x8000000;
|
||||
|
||||
// If set, each line of the generated source will be preceded by a comment specifying which DirectX bytecode instruction it maps to
|
||||
static const unsigned int HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS = 0x10000000;
|
||||
|
||||
// If set, try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11)
|
||||
static const unsigned int HLSLCC_FLAG_KEEP_VARYING_LOCATIONS = 0x20000000;
|
||||
|
||||
// Code generation might vary for mobile targets, or using lower sampler precision than full by default
|
||||
static const unsigned int HLSLCC_FLAG_MOBILE_TARGET = 0x40000000;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
|
||||
unsigned int flags,
|
||||
GLLang language,
|
||||
const GlExtensions *extensions,
|
||||
GLSLCrossDependencyData* dependencies,
|
||||
HLSLccSamplerPrecisionInfo& samplerPrecisions,
|
||||
HLSLccReflection& reflectionCallbacks,
|
||||
GLSLShader* result
|
||||
);
|
||||
|
||||
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
|
||||
unsigned int flags,
|
||||
GLLang language,
|
||||
const GlExtensions *extensions,
|
||||
GLSLCrossDependencyData* dependencies,
|
||||
HLSLccSamplerPrecisionInfo& samplerPrecisions,
|
||||
HLSLccReflection& reflectionCallbacks,
|
||||
GLSLShader* result);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
3
third_party/HLSLcc/include/hlslcc.hpp
vendored
Normal file
3
third_party/HLSLcc/include/hlslcc.hpp
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
extern "C" {
|
||||
#include "hlslcc.h"
|
||||
}
|
799
third_party/HLSLcc/include/pstdint.h
vendored
Normal file
799
third_party/HLSLcc/include/pstdint.h
vendored
Normal file
@ -0,0 +1,799 @@
|
||||
/* A portable stdint.h
|
||||
****************************************************************************
|
||||
* BSD License:
|
||||
****************************************************************************
|
||||
*
|
||||
* Copyright (c) 2005-2011 Paul Hsieh
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
****************************************************************************
|
||||
*
|
||||
* Version 0.1.12
|
||||
*
|
||||
* The ANSI C standard committee, for the C99 standard, specified the
|
||||
* inclusion of a new standard include file called stdint.h. This is
|
||||
* a very useful and long desired include file which contains several
|
||||
* very precise definitions for integer scalar types that is
|
||||
* critically important for making portable several classes of
|
||||
* applications including cryptography, hashing, variable length
|
||||
* integer libraries and so on. But for most developers its likely
|
||||
* useful just for programming sanity.
|
||||
*
|
||||
* The problem is that most compiler vendors have decided not to
|
||||
* implement the C99 standard, and the next C++ language standard
|
||||
* (which has a lot more mindshare these days) will be a long time in
|
||||
* coming and its unknown whether or not it will include stdint.h or
|
||||
* how much adoption it will have. Either way, it will be a long time
|
||||
* before all compilers come with a stdint.h and it also does nothing
|
||||
* for the extremely large number of compilers available today which
|
||||
* do not include this file, or anything comparable to it.
|
||||
*
|
||||
* So that's what this file is all about. Its an attempt to build a
|
||||
* single universal include file that works on as many platforms as
|
||||
* possible to deliver what stdint.h is supposed to. A few things
|
||||
* that should be noted about this file:
|
||||
*
|
||||
* 1) It is not guaranteed to be portable and/or present an identical
|
||||
* interface on all platforms. The extreme variability of the
|
||||
* ANSI C standard makes this an impossibility right from the
|
||||
* very get go. Its really only meant to be useful for the vast
|
||||
* majority of platforms that possess the capability of
|
||||
* implementing usefully and precisely defined, standard sized
|
||||
* integer scalars. Systems which are not intrinsically 2s
|
||||
* complement may produce invalid constants.
|
||||
*
|
||||
* 2) There is an unavoidable use of non-reserved symbols.
|
||||
*
|
||||
* 3) Other standard include files are invoked.
|
||||
*
|
||||
* 4) This file may come in conflict with future platforms that do
|
||||
* include stdint.h. The hope is that one or the other can be
|
||||
* used with no real difference.
|
||||
*
|
||||
* 5) In the current verison, if your platform can't represent
|
||||
* int32_t, int16_t and int8_t, it just dumps out with a compiler
|
||||
* error.
|
||||
*
|
||||
* 6) 64 bit integers may or may not be defined. Test for their
|
||||
* presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
|
||||
* Note that this is different from the C99 specification which
|
||||
* requires the existence of 64 bit support in the compiler. If
|
||||
* this is not defined for your platform, yet it is capable of
|
||||
* dealing with 64 bits then it is because this file has not yet
|
||||
* been extended to cover all of your system's capabilities.
|
||||
*
|
||||
* 7) (u)intptr_t may or may not be defined. Test for its presence
|
||||
* with the test: #ifdef PTRDIFF_MAX. If this is not defined
|
||||
* for your platform, then it is because this file has not yet
|
||||
* been extended to cover all of your system's capabilities, not
|
||||
* because its optional.
|
||||
*
|
||||
* 8) The following might not been defined even if your platform is
|
||||
* capable of defining it:
|
||||
*
|
||||
* WCHAR_MIN
|
||||
* WCHAR_MAX
|
||||
* (u)int64_t
|
||||
* PTRDIFF_MIN
|
||||
* PTRDIFF_MAX
|
||||
* (u)intptr_t
|
||||
*
|
||||
* 9) The following have not been defined:
|
||||
*
|
||||
* WINT_MIN
|
||||
* WINT_MAX
|
||||
*
|
||||
* 10) The criteria for defining (u)int_least(*)_t isn't clear,
|
||||
* except for systems which don't have a type that precisely
|
||||
* defined 8, 16, or 32 bit types (which this include file does
|
||||
* not support anyways). Default definitions have been given.
|
||||
*
|
||||
* 11) The criteria for defining (u)int_fast(*)_t isn't something I
|
||||
* would trust to any particular compiler vendor or the ANSI C
|
||||
* committee. It is well known that "compatible systems" are
|
||||
* commonly created that have very different performance
|
||||
* characteristics from the systems they are compatible with,
|
||||
* especially those whose vendors make both the compiler and the
|
||||
* system. Default definitions have been given, but its strongly
|
||||
* recommended that users never use these definitions for any
|
||||
* reason (they do *NOT* deliver any serious guarantee of
|
||||
* improved performance -- not in this file, nor any vendor's
|
||||
* stdint.h).
|
||||
*
|
||||
* 12) The following macros:
|
||||
*
|
||||
* PRINTF_INTMAX_MODIFIER
|
||||
* PRINTF_INT64_MODIFIER
|
||||
* PRINTF_INT32_MODIFIER
|
||||
* PRINTF_INT16_MODIFIER
|
||||
* PRINTF_LEAST64_MODIFIER
|
||||
* PRINTF_LEAST32_MODIFIER
|
||||
* PRINTF_LEAST16_MODIFIER
|
||||
* PRINTF_INTPTR_MODIFIER
|
||||
*
|
||||
* are strings which have been defined as the modifiers required
|
||||
* for the "d", "u" and "x" printf formats to correctly output
|
||||
* (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
|
||||
* (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
|
||||
* PRINTF_INTPTR_MODIFIER is not defined for some systems which
|
||||
* provide their own stdint.h. PRINTF_INT64_MODIFIER is not
|
||||
* defined if INT64_MAX is not defined. These are an extension
|
||||
* beyond what C99 specifies must be in stdint.h.
|
||||
*
|
||||
* In addition, the following macros are defined:
|
||||
*
|
||||
* PRINTF_INTMAX_HEX_WIDTH
|
||||
* PRINTF_INT64_HEX_WIDTH
|
||||
* PRINTF_INT32_HEX_WIDTH
|
||||
* PRINTF_INT16_HEX_WIDTH
|
||||
* PRINTF_INT8_HEX_WIDTH
|
||||
* PRINTF_INTMAX_DEC_WIDTH
|
||||
* PRINTF_INT64_DEC_WIDTH
|
||||
* PRINTF_INT32_DEC_WIDTH
|
||||
* PRINTF_INT16_DEC_WIDTH
|
||||
* PRINTF_INT8_DEC_WIDTH
|
||||
*
|
||||
* Which specifies the maximum number of characters required to
|
||||
* print the number of that type in either hexadecimal or decimal.
|
||||
* These are an extension beyond what C99 specifies must be in
|
||||
* stdint.h.
|
||||
*
|
||||
* Compilers tested (all with 0 warnings at their highest respective
|
||||
* settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
|
||||
* bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
|
||||
* .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
|
||||
*
|
||||
* This file should be considered a work in progress. Suggestions for
|
||||
* improvements, especially those which increase coverage are strongly
|
||||
* encouraged.
|
||||
*
|
||||
* Acknowledgements
|
||||
*
|
||||
* The following people have made significant contributions to the
|
||||
* development and testing of this file:
|
||||
*
|
||||
* Chris Howie
|
||||
* John Steele Scott
|
||||
* Dave Thorup
|
||||
* John Dill
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
#include <signal.h>
|
||||
|
||||
/*
|
||||
* For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
|
||||
* do nothing else. On the Mac OS X version of gcc this is _STDINT_H_.
|
||||
*/
|
||||
|
||||
#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)))) && !defined(_PSTDINT_H_INCLUDED)
|
||||
#include <stdint.h>
|
||||
#define _PSTDINT_H_INCLUDED
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "ll"
|
||||
# endif
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER "l"
|
||||
# endif
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER "h"
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_MODIFIER
|
||||
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
|
||||
# endif
|
||||
# ifndef PRINTF_INT64_HEX_WIDTH
|
||||
# define PRINTF_INT64_HEX_WIDTH "16"
|
||||
# endif
|
||||
# ifndef PRINTF_INT32_HEX_WIDTH
|
||||
# define PRINTF_INT32_HEX_WIDTH "8"
|
||||
# endif
|
||||
# ifndef PRINTF_INT16_HEX_WIDTH
|
||||
# define PRINTF_INT16_HEX_WIDTH "4"
|
||||
# endif
|
||||
# ifndef PRINTF_INT8_HEX_WIDTH
|
||||
# define PRINTF_INT8_HEX_WIDTH "2"
|
||||
# endif
|
||||
# ifndef PRINTF_INT64_DEC_WIDTH
|
||||
# define PRINTF_INT64_DEC_WIDTH "20"
|
||||
# endif
|
||||
# ifndef PRINTF_INT32_DEC_WIDTH
|
||||
# define PRINTF_INT32_DEC_WIDTH "10"
|
||||
# endif
|
||||
# ifndef PRINTF_INT16_DEC_WIDTH
|
||||
# define PRINTF_INT16_DEC_WIDTH "5"
|
||||
# endif
|
||||
# ifndef PRINTF_INT8_DEC_WIDTH
|
||||
# define PRINTF_INT8_DEC_WIDTH "3"
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_HEX_WIDTH
|
||||
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_DEC_WIDTH
|
||||
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
|
||||
# endif
|
||||
|
||||
/*
|
||||
* Something really weird is going on with Open Watcom. Just pull some of
|
||||
* these duplicated definitions from Open Watcom's stdint.h file for now.
|
||||
*/
|
||||
|
||||
# if defined(__WATCOMC__) && __WATCOMC__ >= 1250
|
||||
# if !defined(INT64_C)
|
||||
# define INT64_C(x) (x + (INT64_MAX - INT64_MAX))
|
||||
# endif
|
||||
# if !defined(UINT64_C)
|
||||
# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
|
||||
# endif
|
||||
# if !defined(INT32_C)
|
||||
# define INT32_C(x) (x + (INT32_MAX - INT32_MAX))
|
||||
# endif
|
||||
# if !defined(UINT32_C)
|
||||
# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX))
|
||||
# endif
|
||||
# if !defined(INT16_C)
|
||||
# define INT16_C(x) (x)
|
||||
# endif
|
||||
# if !defined(UINT16_C)
|
||||
# define UINT16_C(x) (x)
|
||||
# endif
|
||||
# if !defined(INT8_C)
|
||||
# define INT8_C(x) (x)
|
||||
# endif
|
||||
# if !defined(UINT8_C)
|
||||
# define UINT8_C(x) (x)
|
||||
# endif
|
||||
# if !defined(UINT64_MAX)
|
||||
# define UINT64_MAX 18446744073709551615ULL
|
||||
# endif
|
||||
# if !defined(INT64_MAX)
|
||||
# define INT64_MAX 9223372036854775807LL
|
||||
# endif
|
||||
# if !defined(UINT32_MAX)
|
||||
# define UINT32_MAX 4294967295UL
|
||||
# endif
|
||||
# if !defined(INT32_MAX)
|
||||
# define INT32_MAX 2147483647L
|
||||
# endif
|
||||
# if !defined(INTMAX_MAX)
|
||||
# define INTMAX_MAX INT64_MAX
|
||||
# endif
|
||||
# if !defined(INTMAX_MIN)
|
||||
# define INTMAX_MIN INT64_MIN
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef _PSTDINT_H_INCLUDED
|
||||
#define _PSTDINT_H_INCLUDED
|
||||
|
||||
#ifndef SIZE_MAX
|
||||
# define SIZE_MAX (~(size_t)0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Deduce the type assignments from limits.h under the assumption that
|
||||
* integer sizes in bits are powers of 2, and follow the ANSI
|
||||
* definitions.
|
||||
*/
|
||||
|
||||
#ifndef UINT8_MAX
|
||||
# define UINT8_MAX 0xff
|
||||
#endif
|
||||
#ifndef uint8_t
|
||||
# if (UCHAR_MAX == UINT8_MAX) || defined(S_SPLINT_S)
|
||||
typedef unsigned char uint8_t;
|
||||
# define UINT8_C(v) ((uint8_t) v)
|
||||
# else
|
||||
# error "Platform not supported"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef INT8_MAX
|
||||
# define INT8_MAX 0x7f
|
||||
#endif
|
||||
#ifndef INT8_MIN
|
||||
# define INT8_MIN INT8_C(0x80)
|
||||
#endif
|
||||
#ifndef int8_t
|
||||
# if (SCHAR_MAX == INT8_MAX) || defined(S_SPLINT_S)
|
||||
typedef signed char int8_t;
|
||||
# define INT8_C(v) ((int8_t) v)
|
||||
# else
|
||||
# error "Platform not supported"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef UINT16_MAX
|
||||
# define UINT16_MAX 0xffff
|
||||
#endif
|
||||
#ifndef uint16_t
|
||||
#if (UINT_MAX == UINT16_MAX) || defined(S_SPLINT_S)
|
||||
typedef unsigned int uint16_t;
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER ""
|
||||
# endif
|
||||
# define UINT16_C(v) ((uint16_t) (v))
|
||||
#elif (USHRT_MAX == UINT16_MAX)
|
||||
typedef unsigned short uint16_t;
|
||||
# define UINT16_C(v) ((uint16_t) (v))
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER "h"
|
||||
# endif
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef INT16_MAX
|
||||
# define INT16_MAX 0x7fff
|
||||
#endif
|
||||
#ifndef INT16_MIN
|
||||
# define INT16_MIN INT16_C(0x8000)
|
||||
#endif
|
||||
#ifndef int16_t
|
||||
#if (INT_MAX == INT16_MAX) || defined(S_SPLINT_S)
|
||||
typedef signed int int16_t;
|
||||
# define INT16_C(v) ((int16_t) (v))
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER ""
|
||||
# endif
|
||||
#elif (SHRT_MAX == INT16_MAX)
|
||||
typedef signed short int16_t;
|
||||
# define INT16_C(v) ((int16_t) (v))
|
||||
# ifndef PRINTF_INT16_MODIFIER
|
||||
# define PRINTF_INT16_MODIFIER "h"
|
||||
# endif
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef UINT32_MAX
|
||||
# define UINT32_MAX (0xffffffffUL)
|
||||
#endif
|
||||
#ifndef uint32_t
|
||||
#if (ULONG_MAX == UINT32_MAX) || defined(S_SPLINT_S)
|
||||
typedef unsigned long uint32_t;
|
||||
# define UINT32_C(v) v ## UL
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER "l"
|
||||
# endif
|
||||
#elif (UINT_MAX == UINT32_MAX)
|
||||
typedef unsigned int uint32_t;
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER ""
|
||||
# endif
|
||||
# define UINT32_C(v) v ## U
|
||||
#elif (USHRT_MAX == UINT32_MAX)
|
||||
typedef unsigned short uint32_t;
|
||||
# define UINT32_C(v) ((unsigned short) (v))
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER ""
|
||||
# endif
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef INT32_MAX
|
||||
# define INT32_MAX (0x7fffffffL)
|
||||
#endif
|
||||
#ifndef INT32_MIN
|
||||
# define INT32_MIN INT32_C(0x80000000)
|
||||
#endif
|
||||
#ifndef int32_t
|
||||
#if (LONG_MAX == INT32_MAX) || defined(S_SPLINT_S)
|
||||
typedef signed long int32_t;
|
||||
# define INT32_C(v) v ## L
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER "l"
|
||||
# endif
|
||||
#elif (INT_MAX == INT32_MAX)
|
||||
typedef signed int int32_t;
|
||||
# define INT32_C(v) v
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER ""
|
||||
# endif
|
||||
#elif (SHRT_MAX == INT32_MAX)
|
||||
typedef signed short int32_t;
|
||||
# define INT32_C(v) ((short) (v))
|
||||
# ifndef PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_INT32_MODIFIER ""
|
||||
# endif
|
||||
#else
|
||||
#error "Platform not supported"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The macro stdint_int64_defined is temporarily used to record
|
||||
* whether or not 64 integer support is available. It must be
|
||||
* defined for any 64 integer extensions for new platforms that are
|
||||
* added.
|
||||
*/
|
||||
|
||||
#undef stdint_int64_defined
|
||||
#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined(S_SPLINT_S)
|
||||
# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined(S_SPLINT_S)
|
||||
# define stdint_int64_defined
|
||||
typedef long long int64_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
# define UINT64_C(v) v ## ULL
|
||||
# define INT64_C(v) v ## LL
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "ll"
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if !defined(stdint_int64_defined)
|
||||
# if defined(__GNUC__)
|
||||
# define stdint_int64_defined
|
||||
__extension__ typedef long long int64_t;
|
||||
__extension__ typedef unsigned long long uint64_t;
|
||||
# define UINT64_C(v) v ## ULL
|
||||
# define INT64_C(v) v ## LL
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "ll"
|
||||
# endif
|
||||
# elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC) || defined(S_SPLINT_S)
|
||||
# define stdint_int64_defined
|
||||
typedef long long int64_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
# define UINT64_C(v) v ## ULL
|
||||
# define INT64_C(v) v ## LL
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "ll"
|
||||
# endif
|
||||
# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC)
|
||||
# define stdint_int64_defined
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
# define UINT64_C(v) v ## UI64
|
||||
# define INT64_C(v) v ## I64
|
||||
# ifndef PRINTF_INT64_MODIFIER
|
||||
# define PRINTF_INT64_MODIFIER "I64"
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if !defined(LONG_LONG_MAX) && defined(INT64_C)
|
||||
# define LONG_LONG_MAX INT64_C (9223372036854775807)
|
||||
#endif
|
||||
#ifndef ULONG_LONG_MAX
|
||||
# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
|
||||
#endif
|
||||
|
||||
#if !defined(INT64_MAX) && defined(INT64_C)
|
||||
# define INT64_MAX INT64_C (9223372036854775807)
|
||||
#endif
|
||||
#if !defined(INT64_MIN) && defined(INT64_C)
|
||||
# define INT64_MIN INT64_C (-9223372036854775808)
|
||||
#endif
|
||||
#if !defined(UINT64_MAX) && defined(INT64_C)
|
||||
# define UINT64_MAX UINT64_C (18446744073709551615)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Width of hexadecimal for number field.
|
||||
*/
|
||||
|
||||
#ifndef PRINTF_INT64_HEX_WIDTH
|
||||
# define PRINTF_INT64_HEX_WIDTH "16"
|
||||
#endif
|
||||
#ifndef PRINTF_INT32_HEX_WIDTH
|
||||
# define PRINTF_INT32_HEX_WIDTH "8"
|
||||
#endif
|
||||
#ifndef PRINTF_INT16_HEX_WIDTH
|
||||
# define PRINTF_INT16_HEX_WIDTH "4"
|
||||
#endif
|
||||
#ifndef PRINTF_INT8_HEX_WIDTH
|
||||
# define PRINTF_INT8_HEX_WIDTH "2"
|
||||
#endif
|
||||
|
||||
#ifndef PRINTF_INT64_DEC_WIDTH
|
||||
# define PRINTF_INT64_DEC_WIDTH "20"
|
||||
#endif
|
||||
#ifndef PRINTF_INT32_DEC_WIDTH
|
||||
# define PRINTF_INT32_DEC_WIDTH "10"
|
||||
#endif
|
||||
#ifndef PRINTF_INT16_DEC_WIDTH
|
||||
# define PRINTF_INT16_DEC_WIDTH "5"
|
||||
#endif
|
||||
#ifndef PRINTF_INT8_DEC_WIDTH
|
||||
# define PRINTF_INT8_DEC_WIDTH "3"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Ok, lets not worry about 128 bit integers for now. Moore's law says
|
||||
* we don't need to worry about that until about 2040 at which point
|
||||
* we'll have bigger things to worry about.
|
||||
*/
|
||||
|
||||
#ifdef stdint_int64_defined
|
||||
typedef int64_t intmax_t;
|
||||
typedef uint64_t uintmax_t;
|
||||
# define INTMAX_MAX INT64_MAX
|
||||
# define INTMAX_MIN INT64_MIN
|
||||
# define UINTMAX_MAX UINT64_MAX
|
||||
# define UINTMAX_C(v) UINT64_C(v)
|
||||
# define INTMAX_C(v) INT64_C(v)
|
||||
# ifndef PRINTF_INTMAX_MODIFIER
|
||||
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_HEX_WIDTH
|
||||
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_DEC_WIDTH
|
||||
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
|
||||
# endif
|
||||
#else
|
||||
typedef int32_t intmax_t;
|
||||
typedef uint32_t uintmax_t;
|
||||
# define INTMAX_MAX INT32_MAX
|
||||
# define UINTMAX_MAX UINT32_MAX
|
||||
# define UINTMAX_C(v) UINT32_C(v)
|
||||
# define INTMAX_C(v) INT32_C(v)
|
||||
# ifndef PRINTF_INTMAX_MODIFIER
|
||||
# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_HEX_WIDTH
|
||||
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
|
||||
# endif
|
||||
# ifndef PRINTF_INTMAX_DEC_WIDTH
|
||||
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Because this file currently only supports platforms which have
|
||||
* precise powers of 2 as bit sizes for the default integers, the
|
||||
* least definitions are all trivial. Its possible that a future
|
||||
* version of this file could have different definitions.
|
||||
*/
|
||||
|
||||
#ifndef stdint_least_defined
|
||||
typedef int8_t int_least8_t;
|
||||
typedef uint8_t uint_least8_t;
|
||||
typedef int16_t int_least16_t;
|
||||
typedef uint16_t uint_least16_t;
|
||||
typedef int32_t int_least32_t;
|
||||
typedef uint32_t uint_least32_t;
|
||||
# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
|
||||
# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
|
||||
# define UINT_LEAST8_MAX UINT8_MAX
|
||||
# define INT_LEAST8_MAX INT8_MAX
|
||||
# define UINT_LEAST16_MAX UINT16_MAX
|
||||
# define INT_LEAST16_MAX INT16_MAX
|
||||
# define UINT_LEAST32_MAX UINT32_MAX
|
||||
# define INT_LEAST32_MAX INT32_MAX
|
||||
# define INT_LEAST8_MIN INT8_MIN
|
||||
# define INT_LEAST16_MIN INT16_MIN
|
||||
# define INT_LEAST32_MIN INT32_MIN
|
||||
# ifdef stdint_int64_defined
|
||||
typedef int64_t int_least64_t;
|
||||
typedef uint64_t uint_least64_t;
|
||||
# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
|
||||
# define UINT_LEAST64_MAX UINT64_MAX
|
||||
# define INT_LEAST64_MAX INT64_MAX
|
||||
# define INT_LEAST64_MIN INT64_MIN
|
||||
# endif
|
||||
#endif
|
||||
#undef stdint_least_defined
|
||||
|
||||
/*
|
||||
* The ANSI C committee pretending to know or specify anything about
|
||||
* performance is the epitome of misguided arrogance. The mandate of
|
||||
* this file is to *ONLY* ever support that absolute minimum
|
||||
* definition of the fast integer types, for compatibility purposes.
|
||||
* No extensions, and no attempt to suggest what may or may not be a
|
||||
* faster integer type will ever be made in this file. Developers are
|
||||
* warned to stay away from these types when using this or any other
|
||||
* stdint.h.
|
||||
*/
|
||||
|
||||
typedef int_least8_t int_fast8_t;
|
||||
typedef uint_least8_t uint_fast8_t;
|
||||
typedef int_least16_t int_fast16_t;
|
||||
typedef uint_least16_t uint_fast16_t;
|
||||
typedef int_least32_t int_fast32_t;
|
||||
typedef uint_least32_t uint_fast32_t;
|
||||
#define UINT_FAST8_MAX UINT_LEAST8_MAX
|
||||
#define INT_FAST8_MAX INT_LEAST8_MAX
|
||||
#define UINT_FAST16_MAX UINT_LEAST16_MAX
|
||||
#define INT_FAST16_MAX INT_LEAST16_MAX
|
||||
#define UINT_FAST32_MAX UINT_LEAST32_MAX
|
||||
#define INT_FAST32_MAX INT_LEAST32_MAX
|
||||
#define INT_FAST8_MIN INT_LEAST8_MIN
|
||||
#define INT_FAST16_MIN INT_LEAST16_MIN
|
||||
#define INT_FAST32_MIN INT_LEAST32_MIN
|
||||
#ifdef stdint_int64_defined
|
||||
typedef int_least64_t int_fast64_t;
|
||||
typedef uint_least64_t uint_fast64_t;
|
||||
# define UINT_FAST64_MAX UINT_LEAST64_MAX
|
||||
# define INT_FAST64_MAX INT_LEAST64_MAX
|
||||
# define INT_FAST64_MIN INT_LEAST64_MIN
|
||||
#endif
|
||||
|
||||
#undef stdint_int64_defined
|
||||
|
||||
/*
|
||||
* Whatever piecemeal, per compiler thing we can do about the wchar_t
|
||||
* type limits.
|
||||
*/
|
||||
|
||||
#if defined(__WATCOMC__) || defined(_MSC_VER) || defined(__GNUC__)
|
||||
# include <wchar.h>
|
||||
# ifndef WCHAR_MIN
|
||||
# define WCHAR_MIN 0
|
||||
# endif
|
||||
# ifndef WCHAR_MAX
|
||||
# define WCHAR_MAX ((wchar_t)-1)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Whatever piecemeal, per compiler/platform thing we can do about the
|
||||
* (u)intptr_t types and limits.
|
||||
*/
|
||||
|
||||
#if defined(_MSC_VER) && defined(_UINTPTR_T_DEFINED)
|
||||
# define STDINT_H_UINTPTR_T_DEFINED
|
||||
#endif
|
||||
|
||||
#ifndef STDINT_H_UINTPTR_T_DEFINED
|
||||
# if defined(__alpha__) || defined(__ia64__) || defined(__x86_64__) || defined(_WIN64)
|
||||
# define stdint_intptr_bits 64
|
||||
# elif defined(__WATCOMC__) || defined(__TURBOC__)
|
||||
# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
|
||||
# define stdint_intptr_bits 16
|
||||
# else
|
||||
# define stdint_intptr_bits 32
|
||||
# endif
|
||||
# elif defined(__i386__) || defined(_WIN32) || defined(WIN32)
|
||||
# define stdint_intptr_bits 32
|
||||
# elif defined(__INTEL_COMPILER)
|
||||
#error Unknown compiler
|
||||
# endif
|
||||
|
||||
# ifdef stdint_intptr_bits
|
||||
# define stdint_intptr_glue3_i(a, b, c) a##b##c
|
||||
# define stdint_intptr_glue3(a, b, c) stdint_intptr_glue3_i(a,b,c)
|
||||
# ifndef PRINTF_INTPTR_MODIFIER
|
||||
# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
|
||||
# endif
|
||||
# ifndef PTRDIFF_MAX
|
||||
# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
|
||||
# endif
|
||||
# ifndef PTRDIFF_MIN
|
||||
# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
|
||||
# endif
|
||||
# ifndef UINTPTR_MAX
|
||||
# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
|
||||
# endif
|
||||
# ifndef INTPTR_MAX
|
||||
# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
|
||||
# endif
|
||||
# ifndef INTPTR_MIN
|
||||
# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
|
||||
# endif
|
||||
# ifndef INTPTR_C
|
||||
# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
|
||||
# endif
|
||||
# ifndef UINTPTR_C
|
||||
# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
|
||||
# endif
|
||||
typedef stdint_intptr_glue3 (uint, stdint_intptr_bits, _t) uintptr_t;
|
||||
typedef stdint_intptr_glue3 (int, stdint_intptr_bits, _t) intptr_t;
|
||||
# else
|
||||
#error Unknown compiler
|
||||
# endif
|
||||
# define STDINT_H_UINTPTR_T_DEFINED
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Assumes sig_atomic_t is signed and we have a 2s complement machine.
|
||||
*/
|
||||
|
||||
#ifndef SIG_ATOMIC_MAX
|
||||
# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__TEST_PSTDINT_FOR_CORRECTNESS)
|
||||
|
||||
/*
|
||||
* Please compile with the maximum warning settings to make sure macros are not
|
||||
* defined more than once.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define glue3_aux(x, y, z) x ## y ## z
|
||||
#define glue3(x, y, z) glue3_aux(x,y,z)
|
||||
|
||||
#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
|
||||
#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
|
||||
|
||||
#define DECL(us, bits) glue3(DECL,us,) (bits)
|
||||
|
||||
#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
|
||||
|
||||
int main()
|
||||
{
|
||||
DECL(I, 8)
|
||||
DECL(U, 8)
|
||||
DECL(I, 16)
|
||||
DECL(U, 16)
|
||||
DECL(I, 32)
|
||||
DECL(U, 32)
|
||||
#ifdef INT64_MAX
|
||||
DECL(I, 64)
|
||||
DECL(U, 64)
|
||||
#endif
|
||||
intmax_t imax = INTMAX_C(0);
|
||||
uintmax_t umax = UINTMAX_C(0);
|
||||
char str0[256], str1[256];
|
||||
|
||||
sprintf(str0, "%d %x\n", 0, ~0);
|
||||
|
||||
sprintf(str1, "%d %x\n", i8, ~0);
|
||||
if (0 != strcmp(str0, str1)) printf("Something wrong with i8 : %s\n", str1);
|
||||
sprintf(str1, "%u %x\n", u8, ~0);
|
||||
if (0 != strcmp(str0, str1)) printf("Something wrong with u8 : %s\n", str1);
|
||||
sprintf(str1, "%d %x\n", i16, ~0);
|
||||
if (0 != strcmp(str0, str1)) printf("Something wrong with i16 : %s\n", str1);
|
||||
sprintf(str1, "%u %x\n", u16, ~0);
|
||||
if (0 != strcmp(str0, str1)) printf("Something wrong with u16 : %s\n", str1);
|
||||
sprintf(str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0);
|
||||
if (0 != strcmp(str0, str1)) printf("Something wrong with i32 : %s\n", str1);
|
||||
sprintf(str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0);
|
||||
if (0 != strcmp(str0, str1)) printf("Something wrong with u32 : %s\n", str1);
|
||||
#ifdef INT64_MAX
|
||||
sprintf(str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0);
|
||||
if (0 != strcmp(str0, str1)) printf("Something wrong with i64 : %s\n", str1);
|
||||
#endif
|
||||
sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0);
|
||||
if (0 != strcmp(str0, str1)) printf("Something wrong with imax : %s\n", str1);
|
||||
sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0);
|
||||
if (0 != strcmp(str0, str1)) printf("Something wrong with umax : %s\n", str1);
|
||||
|
||||
TESTUMAX(8);
|
||||
TESTUMAX(16);
|
||||
TESTUMAX(32);
|
||||
#ifdef INT64_MAX
|
||||
TESTUMAX(64);
|
||||
#endif
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
#endif
|
53
third_party/HLSLcc/license.txt
vendored
Normal file
53
third_party/HLSLcc/license.txt
vendored
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
Original HLSLcc source code Copyright (c) 2012 James Jones
|
||||
Further improvements Copyright (c) 2014-2016 Unity Technologies
|
||||
All Rights Reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
This software makes use of the bstring library which is provided under the following license:
|
||||
|
||||
Copyright (c) 2002-2008 Paul Hsieh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
Neither the name of bstrlib nor the names of its contributors may be used
|
||||
to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
815
third_party/HLSLcc/src/ControlFlowGraph.cpp
vendored
Normal file
815
third_party/HLSLcc/src/ControlFlowGraph.cpp
vendored
Normal file
@ -0,0 +1,815 @@
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/ControlFlowGraph.h"
|
||||
#include "internal_includes/ControlFlowGraphUtils.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/Operand.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include <algorithm>
|
||||
|
||||
using namespace HLSLcc::ControlFlow;
|
||||
using HLSLcc::ForEachOperand;
|
||||
|
||||
const BasicBlock &ControlFlowGraph::Build(const Instruction* firstInstruction, const Instruction* endInstruction)
|
||||
{
|
||||
using std::for_each;
|
||||
|
||||
m_BlockMap.clear();
|
||||
m_BlockStorage.clear();
|
||||
|
||||
// Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does
|
||||
BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL, endInstruction);
|
||||
|
||||
// Build the reachable set for each block
|
||||
bool hadChanges;
|
||||
do
|
||||
{
|
||||
hadChanges = false;
|
||||
for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr<BasicBlock> &bb)
|
||||
{
|
||||
BasicBlock &b = *bb.get();
|
||||
if (b.RebuildReachable())
|
||||
{
|
||||
hadChanges = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
while (hadChanges == true);
|
||||
|
||||
return *root;
|
||||
}
|
||||
|
||||
const BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) const
|
||||
{
|
||||
BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
|
||||
if (itr == m_BlockMap.end())
|
||||
return NULL;
|
||||
|
||||
return itr->second;
|
||||
}
|
||||
|
||||
BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction)
|
||||
{
|
||||
BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
|
||||
if (itr == m_BlockMap.end())
|
||||
return NULL;
|
||||
|
||||
return itr->second;
|
||||
}
|
||||
|
||||
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build().
|
||||
// Auto-registers itself into ControlFlowGraph
|
||||
BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* endInstruction)
|
||||
: m_Graph(graph)
|
||||
, m_First(psFirst)
|
||||
, m_Last(NULL)
|
||||
, m_End(endInstruction)
|
||||
{
|
||||
m_UEVar.clear();
|
||||
m_VarKill.clear();
|
||||
m_Preceding.clear();
|
||||
m_Succeeding.clear();
|
||||
m_DEDef.clear();
|
||||
m_Reachable.clear();
|
||||
|
||||
// Check that we've pruned the labels
|
||||
ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst));
|
||||
|
||||
// Insert to block storage, block map and connect to previous block
|
||||
m_Graph.m_BlockStorage.push_back(shared_ptr<BasicBlock>(this));
|
||||
|
||||
bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second;
|
||||
ASSERT(didInsert);
|
||||
|
||||
if (psPrecedingBlockHead != NULL)
|
||||
{
|
||||
m_Preceding.insert(psPrecedingBlockHead);
|
||||
BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead);
|
||||
ASSERT(prec != 0);
|
||||
didInsert = prec->m_Succeeding.insert(psFirst).second;
|
||||
ASSERT(didInsert);
|
||||
}
|
||||
|
||||
Build();
|
||||
}
|
||||
|
||||
void BasicBlock::Build()
|
||||
{
|
||||
const Instruction *inst = m_First;
|
||||
while (inst != m_End)
|
||||
{
|
||||
// Process sources first
|
||||
ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
|
||||
[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
|
||||
{
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
uint32_t tempReg = psOperand->ui32RegisterNumber;
|
||||
uint32_t accessMask = psOperand->GetAccessMask();
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(accessMask & (1 << k)))
|
||||
continue;
|
||||
|
||||
uint32_t regIdx = tempReg * 4 + k;
|
||||
// Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore
|
||||
if (m_VarKill.find(regIdx) != m_VarKill.end())
|
||||
continue;
|
||||
|
||||
// Add to UEVars set. Doesn't matter if it's already there.
|
||||
m_UEVar.insert(regIdx);
|
||||
}
|
||||
return;
|
||||
});
|
||||
|
||||
// Then the destination operands
|
||||
ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND,
|
||||
[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
|
||||
{
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
uint32_t tempReg = psOperand->ui32RegisterNumber;
|
||||
uint32_t accessMask = psOperand->GetAccessMask();
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(accessMask & (1 << k)))
|
||||
continue;
|
||||
|
||||
uint32_t regIdx = tempReg * 4 + k;
|
||||
|
||||
// Add to kill set. Dupes are fine, this is a set.
|
||||
m_VarKill.insert(regIdx);
|
||||
// Also into the downward definitions. Overwrite the previous definition in this basic block, if any
|
||||
Definition d(psInst, psOperand);
|
||||
m_DEDef[regIdx].clear();
|
||||
m_DEDef[regIdx].insert(d);
|
||||
}
|
||||
return;
|
||||
});
|
||||
|
||||
// Check for flow control instructions
|
||||
bool blockDone = false;
|
||||
switch (inst->eOpcode)
|
||||
{
|
||||
default:
|
||||
break;
|
||||
case OPCODE_RET:
|
||||
// Continue processing, in the case of unreachable code we still need to translate it properly (case 1160309)
|
||||
// blockDone = true;
|
||||
break;
|
||||
case OPCODE_RETC:
|
||||
// Basic block is done, start a next one.
|
||||
// There REALLY should be no existing blocks for this one
|
||||
ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst + 1)) == NULL);
|
||||
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
|
||||
blockDone = true;
|
||||
break;
|
||||
case OPCODE_LOOP:
|
||||
case OPCODE_CASE:
|
||||
case OPCODE_ENDIF:
|
||||
case OPCODE_ENDSWITCH:
|
||||
// Not a flow control branch, but need to start a new block anyway.
|
||||
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
|
||||
blockDone = true;
|
||||
break;
|
||||
|
||||
// Branches
|
||||
case OPCODE_IF:
|
||||
case OPCODE_BREAKC:
|
||||
case OPCODE_CONTINUEC:
|
||||
{
|
||||
const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
|
||||
ASSERT(jumpPoint != NULL);
|
||||
|
||||
// The control branches to the next instruction or jumps to jumpPoint
|
||||
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
|
||||
AddChildBasicBlock(jumpPoint);
|
||||
|
||||
blockDone = true;
|
||||
break;
|
||||
}
|
||||
case OPCODE_SWITCH:
|
||||
{
|
||||
bool sawEndSwitch = false;
|
||||
bool needConnectToParent = false;
|
||||
const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent);
|
||||
ASSERT(jumpPoint != NULL);
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (!sawEndSwitch || needConnectToParent)
|
||||
AddChildBasicBlock(jumpPoint);
|
||||
|
||||
if (sawEndSwitch)
|
||||
break;
|
||||
|
||||
// The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label
|
||||
ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT);
|
||||
jumpPoint = Utils::GetJumpPoint(jumpPoint - 1, &sawEndSwitch, &needConnectToParent);
|
||||
ASSERT(jumpPoint != NULL);
|
||||
}
|
||||
blockDone = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Non-conditional jumps
|
||||
case OPCODE_BREAK:
|
||||
case OPCODE_ELSE:
|
||||
case OPCODE_CONTINUE:
|
||||
case OPCODE_ENDLOOP:
|
||||
{
|
||||
const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
|
||||
ASSERT(jumpPoint != NULL);
|
||||
|
||||
AddChildBasicBlock(jumpPoint);
|
||||
|
||||
blockDone = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (blockDone)
|
||||
break;
|
||||
|
||||
inst++;
|
||||
}
|
||||
// In initial building phase, just make m_Reachable equal to m_DEDef
|
||||
m_Reachable = m_DEDef;
|
||||
|
||||
// Tag the end of the basic block
|
||||
m_Last = std::max(m_First, std::min(inst, m_End - 1));
|
||||
// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id);
|
||||
}
|
||||
|
||||
BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst)
|
||||
{
|
||||
// First see if this already exists
|
||||
BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst);
|
||||
if (b)
|
||||
{
|
||||
// Just add dependency and we're done
|
||||
b->m_Preceding.insert(m_First);
|
||||
m_Succeeding.insert(psFirst);
|
||||
return b;
|
||||
}
|
||||
// Otherwise create one. Self-registering and self-connecting
|
||||
return new BasicBlock(psFirst, m_Graph, m_First, m_End);
|
||||
}
|
||||
|
||||
bool BasicBlock::RebuildReachable()
|
||||
{
|
||||
// Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes.
|
||||
// Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill())
|
||||
|
||||
ReachableVariables newReachable = m_DEDef;
|
||||
bool hasChanges = false;
|
||||
|
||||
// Loop each predecessor
|
||||
std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr)
|
||||
{
|
||||
const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr);
|
||||
const ReachableVariables &precReachable = prec->Reachable();
|
||||
|
||||
// Loop each variable*component
|
||||
std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair<uint32_t, BasicBlock::ReachableDefinitionsPerVariable> &itr2)
|
||||
{
|
||||
uint32_t regIdx = itr2.first;
|
||||
const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second;
|
||||
|
||||
// Already killed in this block?
|
||||
if (VarKill().find(regIdx) != VarKill().end())
|
||||
return;
|
||||
|
||||
// Only do comparisons against current definitions if we've yet to find any changes
|
||||
BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0;
|
||||
if (!hasChanges)
|
||||
currReachablePerVar = &m_Reachable[regIdx];
|
||||
|
||||
BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx];
|
||||
|
||||
// Loop each definition
|
||||
std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d)
|
||||
{
|
||||
if (!hasChanges)
|
||||
{
|
||||
// Check if already there
|
||||
if (currReachablePerVar->find(d) == currReachablePerVar->end())
|
||||
hasChanges = true;
|
||||
}
|
||||
newReachablePerVar.insert(d);
|
||||
}); // definition
|
||||
}); // variable*component
|
||||
}); // predecessor
|
||||
|
||||
if (hasChanges)
|
||||
{
|
||||
std::swap(m_Reachable, newReachable);
|
||||
}
|
||||
|
||||
return hasChanges;
|
||||
}
|
||||
|
||||
void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b)
|
||||
{
|
||||
std::for_each(b.begin(), b.end(), [&a](const std::pair<uint32_t, ReachableDefinitionsPerVariable> &rpvPair)
|
||||
{
|
||||
uint32_t regIdx = rpvPair.first;
|
||||
const ReachableDefinitionsPerVariable &rpv = rpvPair.second;
|
||||
// No previous definitions for this variable?
|
||||
auto aRPVItr = a.find(regIdx);
|
||||
if (aRPVItr == a.end())
|
||||
{
|
||||
// Just set the definitions and continue
|
||||
a[regIdx] = rpv;
|
||||
return;
|
||||
}
|
||||
ReachableDefinitionsPerVariable &aRPV = aRPVItr->second;
|
||||
aRPV.insert(rpv.begin(), rpv.end());
|
||||
});
|
||||
}
|
||||
|
||||
#if ENABLE_UNIT_TESTS
|
||||
|
||||
#define UNITY_EXTERNAL_TOOL 1
|
||||
#include "Projects/PrecompiledHeaders/UnityPrefix.h" // Needed for defines such as ENABLE_CPP_EXCEPTIONS
|
||||
#include "Testing.h" // From Runtime/Testing
|
||||
|
||||
UNIT_TEST_SUITE(HLSLcc)
|
||||
{
|
||||
TEST(ControlFlowGraph_Build_Simple_Works)
|
||||
{
|
||||
Instruction inst[] =
|
||||
{
|
||||
// MOV t0.xyzw, I0.xyzw
|
||||
Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf),
|
||||
Instruction(1, OPCODE_RET)
|
||||
};
|
||||
|
||||
ControlFlowGraph cfg;
|
||||
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
|
||||
|
||||
CHECK_EQUAL(&inst[0], root.First());
|
||||
CHECK_EQUAL(&inst[1], root.Last());
|
||||
|
||||
CHECK(root.Preceding().empty());
|
||||
CHECK(root.Succeeding().empty());
|
||||
|
||||
CHECK_EQUAL(4, root.VarKill().size());
|
||||
|
||||
// Check that all components from t0 are killed
|
||||
CHECK_EQUAL(1, root.VarKill().count(0));
|
||||
CHECK_EQUAL(1, root.VarKill().count(1));
|
||||
CHECK_EQUAL(1, root.VarKill().count(2));
|
||||
CHECK_EQUAL(1, root.VarKill().count(3));
|
||||
|
||||
CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction);
|
||||
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand);
|
||||
CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction);
|
||||
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand);
|
||||
CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction);
|
||||
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand);
|
||||
CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction);
|
||||
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand);
|
||||
}
|
||||
|
||||
TEST(ControlFlowGraph_Build_If_Works)
|
||||
{
|
||||
Instruction inst[] =
|
||||
{
|
||||
// B0
|
||||
// 0: MOV t1.xyzw, i0.xyzw
|
||||
Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf),
|
||||
// 1: MUL t0, t1, t1
|
||||
Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf),
|
||||
// 2: IF t1.y
|
||||
Instruction(2, OPCODE_IF, 1, 2),
|
||||
// B1
|
||||
// 3: MOV o0, t0
|
||||
Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf),
|
||||
// 4:
|
||||
Instruction(4, OPCODE_ELSE),
|
||||
// B2
|
||||
// 5: MOV o0, t1
|
||||
Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
|
||||
// 6:
|
||||
Instruction(6, OPCODE_ENDIF),
|
||||
// B3
|
||||
// 7:
|
||||
Instruction(7, OPCODE_NOP),
|
||||
// 8:
|
||||
Instruction(8, OPCODE_RET)
|
||||
};
|
||||
|
||||
ControlFlowGraph cfg;
|
||||
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
|
||||
|
||||
CHECK_EQUAL(root.First(), &inst[0]);
|
||||
CHECK_EQUAL(root.Last(), &inst[2]);
|
||||
|
||||
CHECK(root.Preceding().empty());
|
||||
|
||||
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
|
||||
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
|
||||
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]);
|
||||
|
||||
CHECK(b1 != NULL);
|
||||
CHECK(b2 != NULL);
|
||||
CHECK(b3 != NULL);
|
||||
|
||||
CHECK_EQUAL(&inst[3], b1->First());
|
||||
CHECK_EQUAL(&inst[5], b2->First());
|
||||
CHECK_EQUAL(&inst[7], b3->First());
|
||||
|
||||
CHECK_EQUAL(&inst[4], b1->Last());
|
||||
CHECK_EQUAL(&inst[6], b2->Last());
|
||||
CHECK_EQUAL(&inst[8], b3->Last());
|
||||
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[5]));
|
||||
CHECK_EQUAL(2, root.Succeeding().size());
|
||||
|
||||
CHECK_EQUAL(1, b1->Preceding().size());
|
||||
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
|
||||
|
||||
CHECK_EQUAL(1, b2->Preceding().size());
|
||||
CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
|
||||
|
||||
CHECK_EQUAL(2, b3->Preceding().size());
|
||||
CHECK_EQUAL(0, b3->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, b3->Preceding().count(&inst[5]));
|
||||
|
||||
// The if block must have upwards-exposed t0
|
||||
CHECK_EQUAL(1, b1->UEVar().count(0));
|
||||
CHECK_EQUAL(1, b1->UEVar().count(1));
|
||||
CHECK_EQUAL(1, b1->UEVar().count(2));
|
||||
CHECK_EQUAL(1, b1->UEVar().count(3));
|
||||
|
||||
// The else block must have upwards-exposed t1
|
||||
CHECK_EQUAL(1, b2->UEVar().count(4));
|
||||
CHECK_EQUAL(1, b2->UEVar().count(5));
|
||||
CHECK_EQUAL(1, b2->UEVar().count(6));
|
||||
CHECK_EQUAL(1, b2->UEVar().count(7));
|
||||
|
||||
CHECK_EQUAL(8, root.VarKill().size());
|
||||
|
||||
// Check that all components from t0 and t1 are killed
|
||||
CHECK_EQUAL(1, root.VarKill().count(0));
|
||||
CHECK_EQUAL(1, root.VarKill().count(1));
|
||||
CHECK_EQUAL(1, root.VarKill().count(2));
|
||||
CHECK_EQUAL(1, root.VarKill().count(3));
|
||||
|
||||
CHECK_EQUAL(1, root.VarKill().count(4));
|
||||
CHECK_EQUAL(1, root.VarKill().count(5));
|
||||
CHECK_EQUAL(1, root.VarKill().count(6));
|
||||
CHECK_EQUAL(1, root.VarKill().count(7));
|
||||
|
||||
// The expected downwards-exposed definitions:
|
||||
// B0: t0, t1
|
||||
// B1-B3: none
|
||||
|
||||
CHECK_EQUAL(8, root.DEDef().size());
|
||||
CHECK_EQUAL(0, b1->DEDef().size());
|
||||
CHECK_EQUAL(0, b2->DEDef().size());
|
||||
CHECK_EQUAL(0, b3->DEDef().size());
|
||||
|
||||
CHECK(root.DEDef() == root.Reachable());
|
||||
|
||||
CHECK(root.Reachable() == b1->Reachable());
|
||||
CHECK(root.Reachable() == b2->Reachable());
|
||||
CHECK(root.Reachable() == b3->Reachable());
|
||||
}
|
||||
|
||||
TEST(ControlFlowGraph_Build_SwitchCase_Works)
|
||||
{
|
||||
Instruction inst[] =
|
||||
{
|
||||
// Start B0
|
||||
// i0: MOV t0.x, I0.x
|
||||
Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
|
||||
// i1: MOVE t1.xyz, I0.yzw
|
||||
Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe),
|
||||
// i2: MOVE t1.w, t0.x
|
||||
Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1),
|
||||
// i3: MOVE t2, I0
|
||||
Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf),
|
||||
// i4: SWITCH t0.y
|
||||
Instruction(4, OPCODE_SWITCH, 1, 2),
|
||||
// End B0
|
||||
// i5: CASE
|
||||
Instruction(5, OPCODE_CASE),
|
||||
// i6: DEFAULT
|
||||
Instruction(6, OPCODE_DEFAULT),
|
||||
// Start B1
|
||||
// i7: MOC t1.z, t0.x
|
||||
Instruction(7, OPCODE_MOV, 1, 4, 0, 1),
|
||||
// i8: CASE
|
||||
Instruction(8, OPCODE_CASE),
|
||||
// End B1
|
||||
// Start B2
|
||||
// i9: MOV t1.z, t2.x
|
||||
Instruction(9, OPCODE_MOV, 1, 4, 2, 1),
|
||||
// i10: BREAK
|
||||
Instruction(10, OPCODE_BREAK),
|
||||
// End B2
|
||||
// i11: CASE
|
||||
Instruction(11, OPCODE_CASE),
|
||||
// Start B3
|
||||
// i12: MOV t1.z, t2.y
|
||||
Instruction(12, OPCODE_MOV, 1, 4, 2, 2),
|
||||
// i13: BREAKC t0.x
|
||||
Instruction(13, OPCODE_BREAKC, 0, 1),
|
||||
// End B3
|
||||
// i14: CASE
|
||||
Instruction(14, OPCODE_CASE),
|
||||
// Start B4
|
||||
// i15: MOV t1.z, t2.z
|
||||
Instruction(15, OPCODE_MOV, 1, 4, 2, 4),
|
||||
// i16: ENDSWITCH
|
||||
Instruction(16, OPCODE_ENDSWITCH),
|
||||
// End B4
|
||||
// Start B5
|
||||
// i17: MOV o0, t1
|
||||
Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
|
||||
// i18: RET
|
||||
Instruction(18, OPCODE_RET)
|
||||
// End B5
|
||||
};
|
||||
|
||||
ControlFlowGraph cfg;
|
||||
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
|
||||
|
||||
CHECK_EQUAL(&inst[0], root.First());
|
||||
CHECK_EQUAL(&inst[4], root.Last());
|
||||
|
||||
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]);
|
||||
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]);
|
||||
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]);
|
||||
const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]);
|
||||
const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]);
|
||||
|
||||
CHECK(b1 != NULL);
|
||||
CHECK(b2 != NULL);
|
||||
CHECK(b3 != NULL);
|
||||
CHECK(b4 != NULL);
|
||||
CHECK(b5 != NULL);
|
||||
|
||||
// Check instruction ranges
|
||||
CHECK_EQUAL(&inst[8], b1->Last());
|
||||
CHECK_EQUAL(&inst[10], b2->Last());
|
||||
CHECK_EQUAL(&inst[13], b3->Last());
|
||||
CHECK_EQUAL(&inst[16], b4->Last());
|
||||
CHECK_EQUAL(&inst[18], b5->Last());
|
||||
|
||||
// Nothing before the root, nothing after b5
|
||||
CHECK(root.Preceding().empty());
|
||||
CHECK(b5->Succeeding().empty());
|
||||
|
||||
// Check that all connections are there and no others.
|
||||
|
||||
// B0->B1
|
||||
// B0->B2
|
||||
// B0->B3
|
||||
// B0->B4
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[7]));
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[9]));
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[12]));
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[15]));
|
||||
|
||||
CHECK_EQUAL(4, root.Succeeding().size());
|
||||
|
||||
// B1
|
||||
|
||||
// B1->B2
|
||||
CHECK_EQUAL(1, b1->Succeeding().count(&inst[9]));
|
||||
CHECK_EQUAL(1, b1->Succeeding().size());
|
||||
|
||||
// B0->B1, reverse
|
||||
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(1, b1->Preceding().size());
|
||||
|
||||
// B2
|
||||
|
||||
// B2->B5
|
||||
CHECK_EQUAL(1, b2->Succeeding().count(&inst[17]));
|
||||
CHECK_EQUAL(1, b2->Succeeding().size());
|
||||
CHECK_EQUAL(1, b2->Preceding().count(&inst[7]));
|
||||
CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(2, b2->Preceding().size());
|
||||
|
||||
// B3
|
||||
// B3->B4
|
||||
// B3->B5
|
||||
CHECK_EQUAL(1, b3->Succeeding().count(&inst[15]));
|
||||
CHECK_EQUAL(1, b3->Succeeding().count(&inst[17]));
|
||||
CHECK_EQUAL(2, b3->Succeeding().size());
|
||||
CHECK_EQUAL(1, b3->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(1, b3->Preceding().size());
|
||||
|
||||
// B4
|
||||
CHECK_EQUAL(1, b4->Succeeding().count(&inst[17]));
|
||||
CHECK_EQUAL(1, b4->Succeeding().size());
|
||||
CHECK_EQUAL(1, b4->Preceding().count(&inst[0]));
|
||||
CHECK_EQUAL(2, b4->Preceding().size());
|
||||
|
||||
// B5
|
||||
CHECK_EQUAL(0, b5->Succeeding().size());
|
||||
CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4
|
||||
CHECK_EQUAL(1, b5->Preceding().count(&inst[9]));
|
||||
CHECK_EQUAL(1, b5->Preceding().count(&inst[12]));
|
||||
CHECK_EQUAL(1, b5->Preceding().count(&inst[15]));
|
||||
|
||||
|
||||
// Verify reachable sets
|
||||
|
||||
CHECK(root.Reachable() == root.DEDef());
|
||||
CHECK_EQUAL(9, root.Reachable().size());
|
||||
|
||||
// B5 should have these reachables:
|
||||
// t0.x only from b0
|
||||
// t1.xy from b0, i1
|
||||
// t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2)
|
||||
// t1.w from b0, i2
|
||||
// t2.xyzw from b0, i3
|
||||
|
||||
// Cast away const so [] works.
|
||||
BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable();
|
||||
|
||||
CHECK_EQUAL(9, r.size());
|
||||
|
||||
CHECK_EQUAL(1, r[0].size());
|
||||
CHECK_EQUAL(0, r[1].size());
|
||||
CHECK_EQUAL(0, r[2].size());
|
||||
CHECK_EQUAL(0, r[3].size());
|
||||
CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction);
|
||||
|
||||
CHECK_EQUAL(1, r[4].size());
|
||||
CHECK_EQUAL(1, r[5].size());
|
||||
CHECK_EQUAL(3, r[6].size());
|
||||
CHECK_EQUAL(1, r[7].size());
|
||||
|
||||
const BasicBlock::ReachableDefinitionsPerVariable &d = r[6];
|
||||
BasicBlock::ReachableDefinitionsPerVariable t;
|
||||
t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0]));
|
||||
t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0]));
|
||||
t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0]));
|
||||
|
||||
CHECK(t == d);
|
||||
|
||||
CHECK_EQUAL(1, r[8].size());
|
||||
CHECK_EQUAL(1, r[9].size());
|
||||
CHECK_EQUAL(1, r[10].size());
|
||||
CHECK_EQUAL(1, r[11].size());
|
||||
}
|
||||
|
||||
TEST(ControlFlowGraph_Build_Loop_Works)
|
||||
{
|
||||
Instruction inst[] =
|
||||
{
|
||||
// Start B0
|
||||
// i0: MOV t0.x, I0.x
|
||||
Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
|
||||
// i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should.
|
||||
Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc),
|
||||
// i2: LOOP
|
||||
Instruction(2, OPCODE_LOOP, 1, 2),
|
||||
// End B0 -> B1
|
||||
// Begin B1
|
||||
// i3: MOV t1.x, t0.x
|
||||
Instruction(3, OPCODE_MOV, 1, 1, 0, 1),
|
||||
// i4: BREAKC t0.x
|
||||
Instruction(4, OPCODE_BREAKC, 0, 1),
|
||||
// End B1 -> B2, B3
|
||||
// Begin B2
|
||||
// i5: ADD t0.x, t0.y
|
||||
Instruction(5, OPCODE_ADD, 0, 1, 0, 2),
|
||||
// i6: MOV t1.x, t0.x // This should never show up as definition
|
||||
Instruction(6, OPCODE_MOV, 1, 1, 0, 1),
|
||||
// i7: ENDLOOP
|
||||
Instruction(7, OPCODE_ENDLOOP),
|
||||
// End B2 -> B1
|
||||
// Start B3
|
||||
// i8: MOV O0.x, t1.x
|
||||
Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1),
|
||||
// i9: RET
|
||||
Instruction(9, OPCODE_RET),
|
||||
// End B3
|
||||
};
|
||||
|
||||
ControlFlowGraph cfg;
|
||||
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
|
||||
|
||||
CHECK_EQUAL(&inst[0], root.First());
|
||||
CHECK_EQUAL(&inst[2], root.Last());
|
||||
|
||||
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
|
||||
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
|
||||
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]);
|
||||
|
||||
CHECK(b1 != NULL);
|
||||
CHECK(b2 != NULL);
|
||||
CHECK(b3 != NULL);
|
||||
|
||||
// Check instruction ranges
|
||||
CHECK_EQUAL(&inst[4], b1->Last());
|
||||
CHECK_EQUAL(&inst[7], b2->Last());
|
||||
CHECK_EQUAL(&inst[9], b3->Last());
|
||||
|
||||
// Nothing before the root, nothing after b3
|
||||
CHECK(root.Preceding().empty());
|
||||
CHECK(b3->Succeeding().empty());
|
||||
|
||||
// Check that all connections are there and no others.
|
||||
|
||||
// B0->B1
|
||||
CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, root.Succeeding().size());
|
||||
|
||||
// B1
|
||||
|
||||
// B1->B2
|
||||
// B1->B3
|
||||
CHECK_EQUAL(1, b1->Succeeding().count(&inst[5]));
|
||||
CHECK_EQUAL(1, b1->Succeeding().count(&inst[8]));
|
||||
CHECK_EQUAL(2, b1->Succeeding().size());
|
||||
|
||||
// B0->B1, reverse
|
||||
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
|
||||
// We may also come from B2
|
||||
CHECK_EQUAL(1, b1->Preceding().count(&inst[5]));
|
||||
CHECK_EQUAL(2, b1->Preceding().size());
|
||||
|
||||
// B2
|
||||
|
||||
// B2->B1
|
||||
CHECK_EQUAL(1, b2->Succeeding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, b2->Succeeding().size());
|
||||
CHECK_EQUAL(1, b2->Preceding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, b2->Preceding().size());
|
||||
|
||||
// B3
|
||||
CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
|
||||
CHECK_EQUAL(1, b3->Preceding().size());
|
||||
|
||||
// Verify reachable sets
|
||||
|
||||
|
||||
BasicBlock::ReachableVariables t;
|
||||
|
||||
// B0 DEDef and Reachable
|
||||
t.clear();
|
||||
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
|
||||
t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
|
||||
CHECK(root.DEDef() == t);
|
||||
CHECK(root.Reachable() == root.DEDef());
|
||||
|
||||
// B1 DEDef and Reachable
|
||||
t.clear();
|
||||
t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
|
||||
CHECK(b1->DEDef() == t);
|
||||
|
||||
t = b1->DEDef();
|
||||
// t0.x from i0, t1.y (but not .x) from i1
|
||||
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
|
||||
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
|
||||
// t0.x from i5, but nothing from i6
|
||||
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
|
||||
CHECK(b1->Reachable() == t);
|
||||
|
||||
// B2
|
||||
t.clear();
|
||||
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
|
||||
t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0]));
|
||||
CHECK(b2->DEDef() == t);
|
||||
|
||||
t = b2->DEDef();
|
||||
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
|
||||
CHECK(b2->Reachable() == t);
|
||||
|
||||
// B3
|
||||
t.clear();
|
||||
CHECK(b3->DEDef() == t);
|
||||
// t0.x from i0, t1.y from i1
|
||||
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
|
||||
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
|
||||
|
||||
// t1.x from i3
|
||||
t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
|
||||
|
||||
// t0.x from i5
|
||||
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
|
||||
|
||||
CHECK(b3->Reachable() == t);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
116
third_party/HLSLcc/src/ControlFlowGraphUtils.cpp
vendored
Normal file
116
third_party/HLSLcc/src/ControlFlowGraphUtils.cpp
vendored
Normal file
@ -0,0 +1,116 @@
|
||||
#include "ControlFlowGraphUtils.h"
|
||||
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/Operand.h"
|
||||
|
||||
|
||||
// Get the next instruction that's not one of CASE, DEFAULT, LOOP, ENDSWITCH
|
||||
const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch /*= 0*/)
|
||||
{
|
||||
const Instruction *inst = psStart;
|
||||
// Skip CASE/DEFAULT/ENDSWITCH/LOOP labels
|
||||
while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP)
|
||||
{
|
||||
// We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it)
|
||||
ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL);
|
||||
if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL)
|
||||
*sawEndSwitch = true;
|
||||
inst++;
|
||||
}
|
||||
return inst;
|
||||
}
|
||||
|
||||
// For a given flow-control instruction, find the corresponding jump location:
|
||||
// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
|
||||
// For ELSE, find same level ENDIF + 1
|
||||
// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
|
||||
// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
|
||||
// For ENDLOOP, find previous same-level LOOP + 1
|
||||
// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
|
||||
// For CONTINUE/C the previous LOOP + 1
|
||||
// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
|
||||
// Note that CASE labels fall through.
|
||||
// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
|
||||
const Instruction * HLSLcc::ControlFlow::Utils::GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch /*= 0*/, bool *needConnectToParent /* = 0*/)
|
||||
{
|
||||
const Instruction *inst = psStart;
|
||||
int depth = 0;
|
||||
OPCODE_TYPE op = psStart->eOpcode;
|
||||
ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC
|
||||
|| op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT
|
||||
|| op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC);
|
||||
|
||||
switch (op)
|
||||
{
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
case OPCODE_IF:
|
||||
case OPCODE_ELSE:
|
||||
while (1)
|
||||
{
|
||||
inst++;
|
||||
if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0))
|
||||
{
|
||||
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
|
||||
}
|
||||
if (inst->eOpcode == OPCODE_IF)
|
||||
depth++;
|
||||
if (inst->eOpcode == OPCODE_ENDIF)
|
||||
depth--;
|
||||
}
|
||||
case OPCODE_BREAK:
|
||||
case OPCODE_BREAKC:
|
||||
while (1)
|
||||
{
|
||||
inst++;
|
||||
if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
|
||||
{
|
||||
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
|
||||
}
|
||||
if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP)
|
||||
depth++;
|
||||
if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP)
|
||||
depth--;
|
||||
}
|
||||
case OPCODE_CONTINUE:
|
||||
case OPCODE_CONTINUEC:
|
||||
case OPCODE_ENDLOOP:
|
||||
while (1)
|
||||
{
|
||||
inst--;
|
||||
if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0))
|
||||
{
|
||||
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
|
||||
}
|
||||
if (inst->eOpcode == OPCODE_LOOP)
|
||||
depth--;
|
||||
if (inst->eOpcode == OPCODE_ENDLOOP)
|
||||
depth++;
|
||||
}
|
||||
case OPCODE_SWITCH:
|
||||
case OPCODE_CASE:
|
||||
case OPCODE_DEFAULT:
|
||||
while (1)
|
||||
{
|
||||
inst++;
|
||||
if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
|
||||
{
|
||||
// Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH
|
||||
// so that BasicBlock::Build can distinguish between there being a direct route
|
||||
// from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not.
|
||||
|
||||
if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0)
|
||||
*sawEndSwitch = true;
|
||||
|
||||
return GetNextNonLabelInstruction(inst + 1, needConnectToParent);
|
||||
}
|
||||
if (inst->eOpcode == OPCODE_SWITCH)
|
||||
depth++;
|
||||
if (inst->eOpcode == OPCODE_ENDSWITCH)
|
||||
depth--;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
777
third_party/HLSLcc/src/DataTypeAnalysis.cpp
vendored
Normal file
777
third_party/HLSLcc/src/DataTypeAnalysis.cpp
vendored
Normal file
@ -0,0 +1,777 @@
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include "internal_includes/DataTypeAnalysis.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
// Helper function to set the vector type of 1 or more components in a vector
|
||||
// If the existing values (in vector we're writing to) are all SVT_VOID, just upgrade the value and we're done
|
||||
// Otherwise, set all the components in the vector that currently are set to that same value OR are now being written to
|
||||
// to the "highest" type value (ordering int->uint->float)
|
||||
static void SetVectorType(std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType, uint32_t regBaseIndex, uint32_t componentMask, SHADER_VARIABLE_TYPE eType, int *psMadeProgress)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
// Expand the mask to include all components that are used, also upgrade type
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (aeTempVecType[regBaseIndex + i] != SVT_VOID)
|
||||
{
|
||||
componentMask |= (1 << i);
|
||||
eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Now componentMask contains the components we actually need to update and eType may have been changed to something else.
|
||||
// Write the results
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (componentMask & (1 << i))
|
||||
{
|
||||
if (aeTempVecType[regBaseIndex + i] != eType)
|
||||
{
|
||||
aeTempVecType[regBaseIndex + i] = eType;
|
||||
if (psMadeProgress)
|
||||
*psMadeProgress = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static SHADER_VARIABLE_TYPE OperandPrecisionToShaderVariableType(OPERAND_MIN_PRECISION prec, SHADER_VARIABLE_TYPE eDefault)
|
||||
{
|
||||
SHADER_VARIABLE_TYPE eType = eDefault;
|
||||
switch (prec)
|
||||
{
|
||||
case OPERAND_MIN_PRECISION_DEFAULT:
|
||||
break;
|
||||
case OPERAND_MIN_PRECISION_SINT_16:
|
||||
eType = SVT_INT16;
|
||||
break;
|
||||
case OPERAND_MIN_PRECISION_UINT_16:
|
||||
eType = SVT_UINT16;
|
||||
break;
|
||||
case OPERAND_MIN_PRECISION_FLOAT_2_8:
|
||||
eType = SVT_FLOAT10;
|
||||
break;
|
||||
case OPERAND_MIN_PRECISION_FLOAT_16:
|
||||
eType = SVT_FLOAT16;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0); // Catch this to see what's going on.
|
||||
break;
|
||||
}
|
||||
return eType;
|
||||
}
|
||||
|
||||
static void MarkOperandAs(Operand *psOperand, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
|
||||
{
|
||||
if (psOperand->eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4;
|
||||
uint32_t mask = psOperand->GetAccessMask();
|
||||
// Adjust type based on operand precision
|
||||
eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType);
|
||||
|
||||
SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static void MarkAllOperandsAs(Instruction* psInst, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
for (i = 0; i < psInst->ui32NumOperands; i++)
|
||||
{
|
||||
MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType);
|
||||
}
|
||||
}
|
||||
|
||||
// Mark scalars from CBs. TODO: Do we need to do the same for vec2/3's as well? There may be swizzles involved which make it vec4 or something else again.
|
||||
static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand *psOperand)
|
||||
{
|
||||
const ConstantBuffer* psCBuf = NULL;
|
||||
const ShaderVarType* psVarType = NULL;
|
||||
int32_t rebase = 0;
|
||||
bool isArray;
|
||||
|
||||
if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER)
|
||||
return;
|
||||
|
||||
// Ignore selection modes that access more than one component
|
||||
switch (psOperand->eSelMode)
|
||||
{
|
||||
case OPERAND_4_COMPONENT_SELECT_1_MODE:
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
|
||||
if (!psOperand->IsSwizzleReplicated())
|
||||
return;
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_MASK_MODE:
|
||||
return;
|
||||
}
|
||||
|
||||
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf);
|
||||
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
|
||||
|
||||
if (psVarType->Class == SVC_SCALAR)
|
||||
psOperand->iNumComponents = 1;
|
||||
}
|
||||
|
||||
struct SetPartialDataTypes
|
||||
{
|
||||
SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec)
|
||||
: m_TempVec(_aeTempVec)
|
||||
{}
|
||||
SHADER_VARIABLE_TYPE *m_TempVec;
|
||||
|
||||
template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
|
||||
{
|
||||
uint32_t mask = 0;
|
||||
SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
|
||||
SHADER_VARIABLE_TYPE newType;
|
||||
uint32_t i, reg;
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
if (ui32OperandType == FEO_FLAG_SUBOPERAND)
|
||||
{
|
||||
// We really shouldn't ever be getting minprecision float indices here
|
||||
ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8);
|
||||
|
||||
mask = psOperand->GetAccessMask();
|
||||
reg = psOperand->ui32RegisterNumber;
|
||||
newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS);
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (!(mask & (1 << i)))
|
||||
continue;
|
||||
if (aeTempVecType[reg * 4 + i] == SVT_VOID)
|
||||
aeTempVecType[reg * 4 + i] = newType;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT)
|
||||
return;
|
||||
|
||||
mask = psOperand->GetAccessMask();
|
||||
reg = psOperand->ui32RegisterNumber;
|
||||
newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID);
|
||||
ASSERT(newType != SVT_VOID);
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (!(mask & (1 << i)))
|
||||
continue;
|
||||
aeTempVecType[reg * 4 + i] = newType;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Write back the temp datatypes into operands. Also mark scalars in constant buffers
|
||||
|
||||
struct WritebackDataTypes
|
||||
{
|
||||
WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec)
|
||||
: m_Context(_ctx)
|
||||
, m_TempVec(_aeTempVec)
|
||||
{}
|
||||
HLSLCrossCompilerContext *m_Context;
|
||||
SHADER_VARIABLE_TYPE *m_TempVec;
|
||||
|
||||
template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
|
||||
{
|
||||
SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
|
||||
uint32_t reg, mask, i;
|
||||
SHADER_VARIABLE_TYPE dtype;
|
||||
|
||||
if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER)
|
||||
SetCBOperandComponents(m_Context, psOperand);
|
||||
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
reg = psOperand->ui32RegisterNumber;
|
||||
mask = psOperand->GetAccessMask();
|
||||
dtype = SVT_VOID;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (!(mask & (1 << i)))
|
||||
continue;
|
||||
|
||||
// Check that all components have the same type
|
||||
ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]);
|
||||
|
||||
dtype = aeTempVecType[reg * 4 + i];
|
||||
|
||||
ASSERT(dtype != SVT_VOID);
|
||||
ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype));
|
||||
|
||||
psOperand->aeDataType[i] = dtype;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> & instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results)
|
||||
{
|
||||
uint32_t i;
|
||||
Instruction *psFirstInst = &instructions[0];
|
||||
Instruction *psInst = psFirstInst;
|
||||
// Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float
|
||||
std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType = results;
|
||||
|
||||
aeTempVecType.clear();
|
||||
aeTempVecType.resize(ui32TempCount * 4, SVT_VOID);
|
||||
|
||||
if (ui32TempCount == 0)
|
||||
return;
|
||||
|
||||
// Go through the instructions, pick up partial datatypes, because we at least know those for a fact.
|
||||
// Also set all suboperands to be integers (they're always used as indices)
|
||||
ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0]));
|
||||
|
||||
// if (psContext->psShader->ui32MajorVersion <= 3)
|
||||
{
|
||||
// First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table
|
||||
// Only ever to int->float promotion (or int->uint), never the other way around
|
||||
for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
|
||||
{
|
||||
if (psInst->ui32NumOperands == 0)
|
||||
continue;
|
||||
#ifdef _DEBUG
|
||||
for (int k = 0; k < (int)psInst->ui32NumOperands; k++)
|
||||
{
|
||||
if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
switch (psInst->eOpcode)
|
||||
{
|
||||
// All float-only ops
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_DERIV_RTX:
|
||||
case OPCODE_DERIV_RTY:
|
||||
case OPCODE_DIV:
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_EXP:
|
||||
case OPCODE_FRC:
|
||||
case OPCODE_LOG:
|
||||
case OPCODE_MAD:
|
||||
case OPCODE_MIN:
|
||||
case OPCODE_MAX:
|
||||
case OPCODE_MUL:
|
||||
case OPCODE_ROUND_NE:
|
||||
case OPCODE_ROUND_NI:
|
||||
case OPCODE_ROUND_PI:
|
||||
case OPCODE_ROUND_Z:
|
||||
case OPCODE_RSQ:
|
||||
case OPCODE_SAMPLE:
|
||||
case OPCODE_SAMPLE_C:
|
||||
case OPCODE_SAMPLE_C_LZ:
|
||||
case OPCODE_SAMPLE_L:
|
||||
case OPCODE_SAMPLE_D:
|
||||
case OPCODE_SAMPLE_B:
|
||||
case OPCODE_SQRT:
|
||||
case OPCODE_SINCOS:
|
||||
case OPCODE_LOD:
|
||||
case OPCODE_GATHER4:
|
||||
|
||||
case OPCODE_DERIV_RTX_COARSE:
|
||||
case OPCODE_DERIV_RTX_FINE:
|
||||
case OPCODE_DERIV_RTY_COARSE:
|
||||
case OPCODE_DERIV_RTY_FINE:
|
||||
case OPCODE_GATHER4_C:
|
||||
case OPCODE_GATHER4_PO:
|
||||
case OPCODE_GATHER4_PO_C:
|
||||
case OPCODE_RCP:
|
||||
|
||||
MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
// Comparison ops, need to enable possibility for going boolean
|
||||
case OPCODE_IEQ:
|
||||
case OPCODE_INE:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_IF:
|
||||
case OPCODE_BREAKC:
|
||||
case OPCODE_CALLC:
|
||||
case OPCODE_CONTINUEC:
|
||||
case OPCODE_RETC:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_ILT:
|
||||
case OPCODE_IGE:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_ULT:
|
||||
case OPCODE_UGE:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_AND:
|
||||
case OPCODE_OR:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
|
||||
break;
|
||||
|
||||
// Integer ops that don't care of signedness
|
||||
case OPCODE_IADD:
|
||||
case OPCODE_INEG:
|
||||
case OPCODE_ISHL:
|
||||
case OPCODE_NOT:
|
||||
case OPCODE_XOR:
|
||||
case OPCODE_BUFINFO:
|
||||
case OPCODE_COUNTBITS:
|
||||
case OPCODE_FIRSTBIT_HI:
|
||||
case OPCODE_FIRSTBIT_LO:
|
||||
case OPCODE_FIRSTBIT_SHI:
|
||||
case OPCODE_BFI:
|
||||
case OPCODE_BFREV:
|
||||
case OPCODE_ATOMIC_AND:
|
||||
case OPCODE_ATOMIC_OR:
|
||||
case OPCODE_ATOMIC_XOR:
|
||||
case OPCODE_ATOMIC_CMP_STORE:
|
||||
case OPCODE_ATOMIC_IADD:
|
||||
case OPCODE_IMM_ATOMIC_IADD:
|
||||
case OPCODE_IMM_ATOMIC_AND:
|
||||
case OPCODE_IMM_ATOMIC_OR:
|
||||
case OPCODE_IMM_ATOMIC_XOR:
|
||||
case OPCODE_IMM_ATOMIC_EXCH:
|
||||
case OPCODE_IMM_ATOMIC_CMP_EXCH:
|
||||
|
||||
|
||||
MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType);
|
||||
break;
|
||||
|
||||
|
||||
// Integer ops
|
||||
case OPCODE_IMAD:
|
||||
case OPCODE_IMAX:
|
||||
case OPCODE_IMIN:
|
||||
case OPCODE_IMUL:
|
||||
case OPCODE_ISHR:
|
||||
case OPCODE_IBFE:
|
||||
|
||||
case OPCODE_ATOMIC_IMAX:
|
||||
case OPCODE_ATOMIC_IMIN:
|
||||
case OPCODE_IMM_ATOMIC_IMAX:
|
||||
case OPCODE_IMM_ATOMIC_IMIN:
|
||||
MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
|
||||
// uint ops
|
||||
case OPCODE_UDIV:
|
||||
case OPCODE_UMUL:
|
||||
case OPCODE_UMAD:
|
||||
case OPCODE_UMAX:
|
||||
case OPCODE_UMIN:
|
||||
case OPCODE_USHR:
|
||||
case OPCODE_UADDC:
|
||||
case OPCODE_USUBB:
|
||||
case OPCODE_ATOMIC_UMAX:
|
||||
case OPCODE_ATOMIC_UMIN:
|
||||
case OPCODE_IMM_ATOMIC_UMAX:
|
||||
case OPCODE_IMM_ATOMIC_UMIN:
|
||||
case OPCODE_IMM_ATOMIC_ALLOC:
|
||||
case OPCODE_IMM_ATOMIC_CONSUME:
|
||||
MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
case OPCODE_UBFE:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
|
||||
// Need special handling
|
||||
case OPCODE_FTOI:
|
||||
case OPCODE_FTOU:
|
||||
MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_GE:
|
||||
case OPCODE_LT:
|
||||
case OPCODE_EQ:
|
||||
case OPCODE_NE:
|
||||
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_ITOF:
|
||||
case OPCODE_UTOF:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_LD:
|
||||
case OPCODE_LD_MS:
|
||||
{
|
||||
SHADER_VARIABLE_TYPE samplerReturnType = psInst->asOperands[2].aeDataType[0];
|
||||
MarkOperandAs(&psInst->asOperands[0], samplerReturnType, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
}
|
||||
|
||||
case OPCODE_MOVC:
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_SWAPC:
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_RESINFO:
|
||||
// Operand 0 depends on the return type declaration, op 1 is always uint
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
|
||||
switch (psInst->eResInfoReturnType)
|
||||
{
|
||||
default:
|
||||
case RESINFO_INSTRUCTION_RETURN_FLOAT:
|
||||
case RESINFO_INSTRUCTION_RETURN_RCPFLOAT:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
case RESINFO_INSTRUCTION_RETURN_UINT:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case OPCODE_SAMPLE_INFO:
|
||||
// Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint.
|
||||
MarkOperandAs(&psInst->asOperands[0], psInst->eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_SAMPLE_POS:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
|
||||
case OPCODE_LD_UAV_TYPED:
|
||||
// translates to gvec4 loadImage(gimage i, ivec p).
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
|
||||
break;
|
||||
|
||||
case OPCODE_STORE_UAV_TYPED:
|
||||
// translates to storeImage(gimage i, ivec p, gvec4 data)
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data
|
||||
break;
|
||||
|
||||
case OPCODE_LD_RAW:
|
||||
if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
|
||||
else
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_STORE_RAW:
|
||||
if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
|
||||
else
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_LD_STRUCTURED:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_STORE_STRUCTURED:
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_F32TOF16:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
|
||||
break;
|
||||
|
||||
case OPCODE_F16TOF32:
|
||||
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
|
||||
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
|
||||
break;
|
||||
|
||||
|
||||
// No-operands, should never get here anyway
|
||||
/* case OPCODE_BREAK:
|
||||
case OPCODE_CALL:
|
||||
case OPCODE_CASE:
|
||||
case OPCODE_CONTINUE:
|
||||
case OPCODE_CUT:
|
||||
case OPCODE_DEFAULT:
|
||||
case OPCODE_DISCARD:
|
||||
case OPCODE_ELSE:
|
||||
case OPCODE_EMIT:
|
||||
case OPCODE_EMITTHENCUT:
|
||||
case OPCODE_ENDIF:
|
||||
case OPCODE_ENDLOOP:
|
||||
case OPCODE_ENDSWITCH:
|
||||
|
||||
case OPCODE_LABEL:
|
||||
case OPCODE_LOOP:
|
||||
case OPCODE_CUSTOMDATA:
|
||||
case OPCODE_NOP:
|
||||
case OPCODE_RET:
|
||||
case OPCODE_SWITCH:
|
||||
case OPCODE_DCL_RESOURCE: // DCL* opcodes have
|
||||
case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats.
|
||||
case OPCODE_DCL_SAMPLER:
|
||||
case OPCODE_DCL_INDEX_RANGE:
|
||||
case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
|
||||
case OPCODE_DCL_GS_INPUT_PRIMITIVE:
|
||||
case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
|
||||
case OPCODE_DCL_INPUT:
|
||||
case OPCODE_DCL_INPUT_SGV:
|
||||
case OPCODE_DCL_INPUT_SIV:
|
||||
case OPCODE_DCL_INPUT_PS:
|
||||
case OPCODE_DCL_INPUT_PS_SGV:
|
||||
case OPCODE_DCL_INPUT_PS_SIV:
|
||||
case OPCODE_DCL_OUTPUT:
|
||||
case OPCODE_DCL_OUTPUT_SGV:
|
||||
case OPCODE_DCL_OUTPUT_SIV:
|
||||
case OPCODE_DCL_TEMPS:
|
||||
case OPCODE_DCL_INDEXABLE_TEMP:
|
||||
case OPCODE_DCL_GLOBAL_FLAGS:
|
||||
|
||||
|
||||
case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader
|
||||
case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader
|
||||
case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader
|
||||
case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader
|
||||
|
||||
case OPCODE_EMIT_STREAM:
|
||||
case OPCODE_CUT_STREAM:
|
||||
case OPCODE_EMITTHENCUT_STREAM:
|
||||
case OPCODE_INTERFACE_CALL:
|
||||
|
||||
|
||||
case OPCODE_DCL_STREAM:
|
||||
case OPCODE_DCL_FUNCTION_BODY:
|
||||
case OPCODE_DCL_FUNCTION_TABLE:
|
||||
case OPCODE_DCL_INTERFACE:
|
||||
|
||||
case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
|
||||
case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
|
||||
case OPCODE_DCL_TESS_DOMAIN:
|
||||
case OPCODE_DCL_TESS_PARTITIONING:
|
||||
case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
|
||||
case OPCODE_DCL_HS_MAX_TESSFACTOR:
|
||||
case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
|
||||
case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
|
||||
|
||||
case OPCODE_DCL_THREAD_GROUP:
|
||||
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
|
||||
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
|
||||
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
|
||||
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
|
||||
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
|
||||
case OPCODE_DCL_RESOURCE_RAW:
|
||||
case OPCODE_DCL_RESOURCE_STRUCTURED:
|
||||
case OPCODE_SYNC:
|
||||
|
||||
case OPCODE_EVAL_SNAPPED:
|
||||
case OPCODE_EVAL_SAMPLE_INDEX:
|
||||
case OPCODE_EVAL_CENTROID:
|
||||
|
||||
case OPCODE_DCL_GS_INSTANCE_COUNT:
|
||||
|
||||
case OPCODE_ABORT:
|
||||
case OPCODE_DEBUG_BREAK:
|
||||
|
||||
// Double not supported
|
||||
case OPCODE_DADD:
|
||||
case OPCODE_DMAX:
|
||||
case OPCODE_DMIN:
|
||||
case OPCODE_DMUL:
|
||||
case OPCODE_DEQ:
|
||||
case OPCODE_DGE:
|
||||
case OPCODE_DLT:
|
||||
case OPCODE_DNE:
|
||||
case OPCODE_DMOV:
|
||||
case OPCODE_DMOVC:
|
||||
case OPCODE_DTOF:
|
||||
case OPCODE_FTOD:
|
||||
*/
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int madeProgress = 0;
|
||||
// Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have
|
||||
do
|
||||
{
|
||||
madeProgress = 0;
|
||||
psInst = psFirstInst;
|
||||
for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
|
||||
{
|
||||
if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC)
|
||||
{
|
||||
// Figure out the data type
|
||||
uint32_t k;
|
||||
SHADER_VARIABLE_TYPE dataType = SVT_VOID;
|
||||
int foundImmediate = 0;
|
||||
for (k = 0; k < psInst->ui32NumOperands; k++)
|
||||
{
|
||||
uint32_t mask, j;
|
||||
if (psInst->eOpcode == OPCODE_MOVC && k == 1)
|
||||
continue; // Ignore the condition operand, it's always int
|
||||
|
||||
if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
{
|
||||
foundImmediate = 1;
|
||||
continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed
|
||||
}
|
||||
|
||||
if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
|
||||
{
|
||||
dataType = psInst->asOperands[k].GetDataType(psContext);
|
||||
break;
|
||||
}
|
||||
|
||||
if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE)
|
||||
{
|
||||
// If any modifiers are used in MOV or MOVC, that automatically is treated as float.
|
||||
dataType = SVT_FLOAT;
|
||||
break;
|
||||
}
|
||||
|
||||
mask = psInst->asOperands[k].GetAccessMask();
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (!(mask & (1 << j)))
|
||||
continue;
|
||||
if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID)
|
||||
{
|
||||
dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use at minimum int type when any operand is immediate.
|
||||
// Allowing bool could lead into bugs like case 883080
|
||||
if (foundImmediate && (dataType == SVT_VOID || dataType == SVT_BOOL))
|
||||
dataType = SVT_INT;
|
||||
|
||||
if (dataType != SVT_VOID)
|
||||
{
|
||||
// Found data type, write to all operands
|
||||
// First adjust it to not have precision qualifiers in it
|
||||
switch (dataType)
|
||||
{
|
||||
case SVT_FLOAT10:
|
||||
case SVT_FLOAT16:
|
||||
dataType = SVT_FLOAT;
|
||||
break;
|
||||
case SVT_INT12:
|
||||
case SVT_INT16:
|
||||
dataType = SVT_INT;
|
||||
break;
|
||||
case SVT_UINT16:
|
||||
case SVT_UINT8:
|
||||
dataType = SVT_UINT;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
for (k = 0; k < psInst->ui32NumOperands; k++)
|
||||
{
|
||||
uint32_t mask;
|
||||
if (psInst->eOpcode == OPCODE_MOVC && k == 1)
|
||||
continue; // Ignore the condition operand, it's always int
|
||||
|
||||
if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
|
||||
continue;
|
||||
if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
|
||||
continue;
|
||||
|
||||
mask = psInst->asOperands[k].GetAccessMask();
|
||||
SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
while (madeProgress != 0);
|
||||
}
|
||||
|
||||
|
||||
// translate forced_int and int_ambiguous back to int
|
||||
for (i = 0; i < ui32TempCount * 4; i++)
|
||||
{
|
||||
if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS)
|
||||
aeTempVecType[i] = SVT_INT;
|
||||
}
|
||||
|
||||
ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0]));
|
||||
|
||||
// Propagate boolean data types over logical operators
|
||||
bool didProgress = false;
|
||||
do
|
||||
{
|
||||
didProgress = false;
|
||||
std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i)
|
||||
{
|
||||
if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR)
|
||||
&& (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL)
|
||||
&& (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL))
|
||||
{
|
||||
// Check if all uses see only this define
|
||||
bool isStandalone = true;
|
||||
std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u)
|
||||
{
|
||||
if (u.m_Op->m_Defines.size() > 1)
|
||||
isStandalone = false;
|
||||
});
|
||||
|
||||
if (isStandalone)
|
||||
{
|
||||
didProgress = true;
|
||||
// Change data type of this and all uses
|
||||
i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL;
|
||||
uint32_t reg = i.asOperands[0].ui32RegisterNumber;
|
||||
aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL;
|
||||
|
||||
std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u)
|
||||
{
|
||||
u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
while (didProgress);
|
||||
}
|
1
third_party/HLSLcc/src/Declaration.cpp
vendored
Normal file
1
third_party/HLSLcc/src/Declaration.cpp
vendored
Normal file
@ -0,0 +1 @@
|
||||
#include "internal_includes/Declaration.h"
|
350
third_party/HLSLcc/src/HLSLCrossCompilerContext.cpp
vendored
Normal file
350
third_party/HLSLcc/src/HLSLCrossCompilerContext.cpp
vendored
Normal file
@ -0,0 +1,350 @@
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/DataTypeAnalysis.h"
|
||||
#include "internal_includes/UseDefineChains.h"
|
||||
#include "internal_includes/Declaration.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/Translator.h"
|
||||
#include "internal_includes/ControlFlowGraph.h"
|
||||
#include "internal_includes/languages.h"
|
||||
#include "include/hlslcc.h"
|
||||
#include <sstream>
|
||||
|
||||
void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase)
|
||||
{
|
||||
size_t ui32DeclCount = psPhase->psDecl.size();
|
||||
uint32_t i;
|
||||
|
||||
psPhase->psTempDeclaration = NULL;
|
||||
psPhase->ui32OrigTemps = 0;
|
||||
psPhase->ui32TotalTemps = 0;
|
||||
|
||||
// Retrieve the temp decl count
|
||||
for (i = 0; i < ui32DeclCount; ++i)
|
||||
{
|
||||
if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS)
|
||||
{
|
||||
psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps;
|
||||
psPhase->psTempDeclaration = &psPhase->psDecl[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (psPhase->ui32TotalTemps == 0)
|
||||
return;
|
||||
|
||||
psPhase->ui32OrigTemps = psPhase->ui32TotalTemps;
|
||||
|
||||
// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
|
||||
// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
|
||||
psPhase->pui32SplitInfo.clear();
|
||||
psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff);
|
||||
|
||||
// Build use-define chains and split temps based on those.
|
||||
{
|
||||
DefineUseChains duChains;
|
||||
UseDefineChains udChains;
|
||||
|
||||
BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG());
|
||||
|
||||
CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps);
|
||||
|
||||
// Only do sampler precision downgrade with pixel shaders on mobile targets / Switch
|
||||
if (psShader->eShaderType == PIXEL_SHADER && (IsMobileTarget(this) || IsSwitch()))
|
||||
UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps);
|
||||
|
||||
UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo);
|
||||
|
||||
WriteBackUsesAndDefines(duChains);
|
||||
}
|
||||
|
||||
HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes);
|
||||
|
||||
if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps))
|
||||
psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps;
|
||||
}
|
||||
|
||||
void HLSLCrossCompilerContext::ReserveFramebufferFetchInputs()
|
||||
{
|
||||
if (psShader->eShaderType != PIXEL_SHADER)
|
||||
return;
|
||||
|
||||
if (!psShader->extensions->EXT_shader_framebuffer_fetch)
|
||||
return;
|
||||
|
||||
if ((flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) == 0)
|
||||
return;
|
||||
|
||||
if (!(psShader->eTargetLanguage >= LANG_ES_300 && psShader->eTargetLanguage <= LANG_ES_LAST))
|
||||
return;
|
||||
|
||||
if (!psDependencies)
|
||||
return;
|
||||
|
||||
if (!HaveUniformBindingsAndLocations(psShader->eTargetLanguage, psShader->extensions, flags) &&
|
||||
((flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) == 0 || (flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != 0))
|
||||
return;
|
||||
|
||||
// The Adreno GLSL compiler fails to compile shaders that use the same location for textures and inout attachments
|
||||
// So here we figure out the maximum index of any inout render target and then make sure that we never use those for textures.
|
||||
int maxInOutRenderTargetIndex = -1;
|
||||
for (const Declaration& decl : psShader->asPhases[0].psDecl)
|
||||
{
|
||||
if (decl.eOpcode != OPCODE_DCL_INPUT_PS)
|
||||
continue;
|
||||
|
||||
const Operand& operand = decl.asOperands[0];
|
||||
if (!operand.iPSInOut)
|
||||
continue;
|
||||
|
||||
const ShaderInfo::InOutSignature* signature = NULL;
|
||||
if (!psShader->sInfo.GetInputSignatureFromRegister(operand.ui32RegisterNumber, operand.ui32CompMask, &signature, true))
|
||||
continue;
|
||||
|
||||
const int index = signature->ui32SemanticIndex;
|
||||
if (index > maxInOutRenderTargetIndex)
|
||||
maxInOutRenderTargetIndex = index;
|
||||
}
|
||||
|
||||
if (maxInOutRenderTargetIndex >= 0)
|
||||
{
|
||||
if (maxInOutRenderTargetIndex >= psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture])
|
||||
psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture] = maxInOutRenderTargetIndex + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void HLSLCrossCompilerContext::ClearDependencyData()
|
||||
{
|
||||
switch (psShader->eShaderType)
|
||||
{
|
||||
case PIXEL_SHADER:
|
||||
{
|
||||
psDependencies->ClearCrossDependencyData();
|
||||
break;
|
||||
}
|
||||
case HULL_SHADER:
|
||||
{
|
||||
psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
|
||||
psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void HLSLCrossCompilerContext::AddIndentation()
|
||||
{
|
||||
int i;
|
||||
bstring glsl = *currentGLSLString;
|
||||
for (i = 0; i < indent; ++i)
|
||||
{
|
||||
bcatcstr(glsl, " ");
|
||||
}
|
||||
}
|
||||
|
||||
bool HLSLCrossCompilerContext::RequireExtension(const std::string &extName)
|
||||
{
|
||||
if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end())
|
||||
return true;
|
||||
|
||||
m_EnabledExtensions.insert(extName);
|
||||
bformata(extensions, "#extension %s : require\n", extName.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HLSLCrossCompilerContext::EnableExtension(const std::string &extName)
|
||||
{
|
||||
if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end())
|
||||
return true;
|
||||
|
||||
m_EnabledExtensions.insert(extName);
|
||||
bformata(extensions, "#ifdef %s\n", extName.c_str());
|
||||
bformata(extensions, "#extension %s : enable\n", extName.c_str());
|
||||
bcatcstr(extensions, "#endif\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
const ShaderInfo::InOutSignature* psIn = NULL;
|
||||
int regSpace = psOperand->GetRegisterSpace(this);
|
||||
|
||||
if (iIgnoreRedirect == 0)
|
||||
{
|
||||
if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
|
||||
||
|
||||
(regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
|
||||
{
|
||||
oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber;
|
||||
if (piRebase)
|
||||
*piRebase = 0;
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
|
||||
if (regSpace == 0)
|
||||
psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
|
||||
else
|
||||
psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
|
||||
|
||||
if (psIn && piRebase)
|
||||
*piRebase = psIn->iRebase;
|
||||
|
||||
const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch";
|
||||
std::string res = "";
|
||||
|
||||
bool skipPrefix = false;
|
||||
if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix, &iIgnoreRedirect))
|
||||
{
|
||||
if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix)
|
||||
return inputPrefix + res;
|
||||
else
|
||||
return res;
|
||||
}
|
||||
|
||||
ASSERT(psIn != NULL);
|
||||
oss << inputPrefix << (regSpace == 1 ? patchPrefix : "") << psIn->semanticName << psIn->ui32SemanticIndex;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOperand,
|
||||
int* piStream,
|
||||
uint32_t *puiIgnoreSwizzle,
|
||||
int *piRebase,
|
||||
int iIgnoreRedirect) const
|
||||
{
|
||||
std::ostringstream oss;
|
||||
const ShaderInfo::InOutSignature* psOut = NULL;
|
||||
int regSpace = psOperand->GetRegisterSpace(this);
|
||||
|
||||
if (iIgnoreRedirect == 0)
|
||||
{
|
||||
if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
|
||||
|| (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
|
||||
{
|
||||
oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber;
|
||||
if (piRebase)
|
||||
*piRebase = 0;
|
||||
return oss.str();
|
||||
}
|
||||
}
|
||||
|
||||
if (regSpace == 0)
|
||||
psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true);
|
||||
else
|
||||
psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true);
|
||||
|
||||
|
||||
if (psOut && piRebase)
|
||||
*piRebase = psOut->iRebase;
|
||||
|
||||
if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end()))
|
||||
{
|
||||
// Need to route through temp output variable
|
||||
oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second;
|
||||
if (!psOperand->m_SubOperands[0].get())
|
||||
{
|
||||
oss << "[" << psOperand->ui32RegisterNumber << "]";
|
||||
}
|
||||
if (piRebase)
|
||||
*piRebase = 0;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch";
|
||||
std::string res = "";
|
||||
|
||||
if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false, NULL, &iIgnoreRedirect))
|
||||
{
|
||||
// clip/cull planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count
|
||||
// with tessellation factor buffers, a separate buffer from output is used. for some reason TranslateSystemValue return *outSkipPrefix = true
|
||||
// for ALL system vars and then we simply ignore it here, so opt to modify iIgnoreRedirect for these special cases
|
||||
|
||||
if (psShader->eTargetLanguage == LANG_METAL && regSpace == 0 && (iIgnoreRedirect == 0))
|
||||
return outputPrefix + res;
|
||||
else if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0))
|
||||
return patchPrefix + res;
|
||||
else
|
||||
return res;
|
||||
}
|
||||
ASSERT(psOut != NULL);
|
||||
|
||||
oss << outputPrefix << (regSpace == 1 ? patchPrefix : "") << psOut->semanticName << psOut->ui32SemanticIndex;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, const int count)
|
||||
{
|
||||
char compMask = (char)psOperand->ui32CompMask;
|
||||
int regSpace = psOperand->GetRegisterSpace(this);
|
||||
uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams
|
||||
ASSERT(psShader->ui32CurrentVertexOutputStream < 4);
|
||||
|
||||
// First check for various builtins, mostly depth-output ones.
|
||||
if (psShader->eShaderType == PIXEL_SHADER)
|
||||
{
|
||||
if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL ||
|
||||
psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH)
|
||||
{
|
||||
// GL doesn't need declaration, Metal does.
|
||||
return psShader->eTargetLanguage == LANG_METAL;
|
||||
}
|
||||
}
|
||||
|
||||
// Needs declaring if any of the components hasn't been already declared
|
||||
if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0)
|
||||
{
|
||||
int offset;
|
||||
const ShaderInfo::InOutSignature* psSignature = NULL;
|
||||
|
||||
if (psOperand->eSpecialName == NAME_UNDEFINED)
|
||||
{
|
||||
// Need to fetch the actual comp mask
|
||||
if (regSpace == 0)
|
||||
psShader->sInfo.GetOutputSignatureFromRegister(
|
||||
psOperand->ui32RegisterNumber,
|
||||
psOperand->ui32CompMask,
|
||||
psShader->ui32CurrentVertexOutputStream,
|
||||
&psSignature);
|
||||
else
|
||||
psShader->sInfo.GetPatchConstantSignatureFromRegister(
|
||||
psOperand->ui32RegisterNumber,
|
||||
psOperand->ui32CompMask,
|
||||
&psSignature);
|
||||
|
||||
compMask = (char)psSignature->ui32Mask;
|
||||
}
|
||||
for (offset = 0; offset < count; offset++)
|
||||
{
|
||||
psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask;
|
||||
}
|
||||
|
||||
if (psSignature && (psSignature->semanticName == "PSIZE") && (psShader->eTargetLanguage != LANG_METAL))
|
||||
{
|
||||
// gl_PointSize, doesn't need declaring. TODO: Metal doesn't have pointsize at all?
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HLSLCrossCompilerContext::IsVulkan() const
|
||||
{
|
||||
return (flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0;
|
||||
}
|
||||
|
||||
bool HLSLCrossCompilerContext::IsSwitch() const
|
||||
{
|
||||
return (flags & HLSLCC_FLAG_NVN_TARGET) != 0;
|
||||
}
|
250
third_party/HLSLcc/src/HLSLcc.cpp
vendored
Normal file
250
third_party/HLSLcc/src/HLSLcc.cpp
vendored
Normal file
@ -0,0 +1,250 @@
|
||||
#include "hlslcc.h"
|
||||
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/toGLSL.h"
|
||||
#include "internal_includes/toMetal.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/decode.h"
|
||||
|
||||
|
||||
#ifndef GL_VERTEX_SHADER_ARB
|
||||
#define GL_VERTEX_SHADER_ARB 0x8B31
|
||||
#endif
|
||||
#ifndef GL_FRAGMENT_SHADER_ARB
|
||||
#define GL_FRAGMENT_SHADER_ARB 0x8B30
|
||||
#endif
|
||||
#ifndef GL_GEOMETRY_SHADER
|
||||
#define GL_GEOMETRY_SHADER 0x8DD9
|
||||
#endif
|
||||
#ifndef GL_TESS_EVALUATION_SHADER
|
||||
#define GL_TESS_EVALUATION_SHADER 0x8E87
|
||||
#endif
|
||||
#ifndef GL_TESS_CONTROL_SHADER
|
||||
#define GL_TESS_CONTROL_SHADER 0x8E88
|
||||
#endif
|
||||
#ifndef GL_COMPUTE_SHADER
|
||||
#define GL_COMPUTE_SHADER 0x91B9
|
||||
#endif
|
||||
|
||||
static bool CheckConstantBuffersNoDuplicateNames(const std::vector<ConstantBuffer>& buffers, HLSLccReflection& reflectionCallbacks)
|
||||
{
|
||||
uint32_t count = buffers.size();
|
||||
for (uint32_t i = 0; i < count; ++i)
|
||||
{
|
||||
const ConstantBuffer& lhs = buffers[i];
|
||||
for (uint32_t j = i + 1; j < count; ++j)
|
||||
{
|
||||
const ConstantBuffer& rhs = buffers[j];
|
||||
if (lhs.name == rhs.name)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Duplicate constant buffer declaration: " << lhs.name;
|
||||
reflectionCallbacks.OnDiagnostics(oss.str(), 0, true);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
|
||||
unsigned int flags,
|
||||
GLLang language,
|
||||
const GlExtensions *extensions,
|
||||
GLSLCrossDependencyData* dependencies,
|
||||
HLSLccSamplerPrecisionInfo& samplerPrecisions,
|
||||
HLSLccReflection& reflectionCallbacks,
|
||||
GLSLShader* result)
|
||||
{
|
||||
uint32_t* tokens;
|
||||
char* glslcstr = NULL;
|
||||
int GLSLShaderType = GL_FRAGMENT_SHADER_ARB;
|
||||
int success = 0;
|
||||
uint32_t i;
|
||||
|
||||
tokens = (uint32_t*)shader;
|
||||
|
||||
std::auto_ptr<Shader> psShader(DecodeDXBC(tokens, flags));
|
||||
|
||||
if (psShader.get())
|
||||
{
|
||||
Shader* shader = psShader.get();
|
||||
if (!CheckConstantBuffersNoDuplicateNames(shader->sInfo.psConstantBuffers, reflectionCallbacks))
|
||||
return 0;
|
||||
|
||||
HLSLCrossCompilerContext sContext(reflectionCallbacks);
|
||||
|
||||
// Add shader precisions from the list
|
||||
psShader->sInfo.AddSamplerPrecisions(samplerPrecisions);
|
||||
|
||||
if (psShader->ui32MajorVersion <= 3)
|
||||
{
|
||||
flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS;
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
flags |= HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS;
|
||||
#endif
|
||||
|
||||
sContext.psShader = shader;
|
||||
sContext.flags = flags;
|
||||
|
||||
// If dependencies == NULL, we'll create a dummy object for it so that there's always something there.
|
||||
std::auto_ptr<GLSLCrossDependencyData> depPtr(NULL);
|
||||
if (dependencies == NULL)
|
||||
{
|
||||
depPtr.reset(new GLSLCrossDependencyData());
|
||||
sContext.psDependencies = depPtr.get();
|
||||
sContext.psDependencies->SetupGLSLResourceBindingSlotsIndices();
|
||||
}
|
||||
else
|
||||
sContext.psDependencies = dependencies;
|
||||
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
psShader->asPhases[i].hasPostShaderCode = 0;
|
||||
}
|
||||
|
||||
if (language == LANG_METAL)
|
||||
{
|
||||
// Geometry shader is not supported
|
||||
if (psShader->eShaderType == GEOMETRY_SHADER)
|
||||
{
|
||||
result->sourceCode = "";
|
||||
return 0;
|
||||
}
|
||||
ToMetal translator(&sContext);
|
||||
if (!translator.Translate())
|
||||
{
|
||||
bdestroy(sContext.glsl);
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
bdestroy(psShader->asPhases[i].postShaderCode);
|
||||
bdestroy(psShader->asPhases[i].earlyMain);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ToGLSL translator(&sContext);
|
||||
language = translator.SetLanguage(language);
|
||||
translator.SetExtensions(extensions);
|
||||
if (!translator.Translate())
|
||||
{
|
||||
bdestroy(sContext.glsl);
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
bdestroy(psShader->asPhases[i].postShaderCode);
|
||||
bdestroy(psShader->asPhases[i].earlyMain);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
switch (psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_VERTEX_SHADER_ARB;
|
||||
break;
|
||||
}
|
||||
case GEOMETRY_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_GEOMETRY_SHADER;
|
||||
break;
|
||||
}
|
||||
case DOMAIN_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_TESS_EVALUATION_SHADER;
|
||||
break;
|
||||
}
|
||||
case HULL_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_TESS_CONTROL_SHADER;
|
||||
break;
|
||||
}
|
||||
case COMPUTE_SHADER:
|
||||
{
|
||||
GLSLShaderType = GL_COMPUTE_SHADER;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
glslcstr = bstr2cstr(sContext.glsl, '\0');
|
||||
result->sourceCode = glslcstr;
|
||||
bcstrfree(glslcstr);
|
||||
|
||||
bdestroy(sContext.glsl);
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
bdestroy(psShader->asPhases[i].postShaderCode);
|
||||
bdestroy(psShader->asPhases[i].earlyMain);
|
||||
}
|
||||
|
||||
result->reflection = psShader->sInfo;
|
||||
|
||||
result->textureSamplers = psShader->textureSamplers;
|
||||
|
||||
success = 1;
|
||||
}
|
||||
|
||||
shader = 0;
|
||||
tokens = 0;
|
||||
|
||||
/* Fill in the result struct */
|
||||
|
||||
result->shaderType = GLSLShaderType;
|
||||
result->GLSLLanguage = language;
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
|
||||
unsigned int flags,
|
||||
GLLang language,
|
||||
const GlExtensions *extensions,
|
||||
GLSLCrossDependencyData* dependencies,
|
||||
HLSLccSamplerPrecisionInfo& samplerPrecisions,
|
||||
HLSLccReflection& reflectionCallbacks,
|
||||
GLSLShader* result)
|
||||
{
|
||||
FILE* shaderFile;
|
||||
int length;
|
||||
size_t readLength;
|
||||
std::vector<char> shader;
|
||||
int success = 0;
|
||||
|
||||
shaderFile = fopen(filename, "rb");
|
||||
|
||||
if (!shaderFile)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
fseek(shaderFile, 0, SEEK_END);
|
||||
length = ftell(shaderFile);
|
||||
fseek(shaderFile, 0, SEEK_SET);
|
||||
|
||||
shader.resize(length + 1);
|
||||
|
||||
readLength = fread(&shader[0], 1, length, shaderFile);
|
||||
|
||||
fclose(shaderFile);
|
||||
shaderFile = 0;
|
||||
|
||||
shader[readLength] = '\0';
|
||||
|
||||
success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result);
|
||||
|
||||
return success;
|
||||
}
|
574
third_party/HLSLcc/src/HLSLccToolkit.cpp
vendored
Normal file
574
third_party/HLSLcc/src/HLSLccToolkit.cpp
vendored
Normal file
@ -0,0 +1,574 @@
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/toGLSLOperand.h"
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/languages.h"
|
||||
#include "include/UnityInstancingFlexibleArraySize.h"
|
||||
#include <sstream>
|
||||
#include <cmath>
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
uint32_t GetNumberBitsSet(uint32_t a)
|
||||
{
|
||||
// Calculate number of bits in a
|
||||
// Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
|
||||
// Works only up to 14 bits (we're only using up to 4)
|
||||
return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;
|
||||
}
|
||||
|
||||
uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType)
|
||||
{
|
||||
if (eType == SVT_FLOAT16)
|
||||
{
|
||||
return TO_FLAG_FORCE_HALF;
|
||||
}
|
||||
if (eType == SVT_UINT || eType == SVT_UINT16)
|
||||
{
|
||||
return TO_FLAG_UNSIGNED_INTEGER;
|
||||
}
|
||||
else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12)
|
||||
{
|
||||
return TO_FLAG_INTEGER;
|
||||
}
|
||||
else if (eType == SVT_BOOL)
|
||||
{
|
||||
return TO_FLAG_BOOL;
|
||||
}
|
||||
else
|
||||
{
|
||||
return TO_FLAG_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags)
|
||||
{
|
||||
if (typeflags & TO_FLAG_FORCE_HALF)
|
||||
return SVT_FLOAT16;
|
||||
if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT))
|
||||
return SVT_INT;
|
||||
if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT))
|
||||
return SVT_UINT;
|
||||
if (typeflags & TO_FLAG_BOOL)
|
||||
return SVT_BOOL;
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
|
||||
const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision)
|
||||
{
|
||||
static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" };
|
||||
static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" };
|
||||
static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" };
|
||||
static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" };
|
||||
static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" };
|
||||
static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" };
|
||||
static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" };
|
||||
static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" };
|
||||
static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" };
|
||||
|
||||
ASSERT(components >= 1 && components <= 4);
|
||||
bool emitLowp = EmitLowp(context);
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case SVT_UINT:
|
||||
return HaveUnsignedTypes(context->psShader->eTargetLanguage) ? uintTypes[components] : intTypes[components];
|
||||
case SVT_UINT16:
|
||||
return useGLSLPrecision ? uint16Types[components] : uintTypes[components];
|
||||
case SVT_INT:
|
||||
return intTypes[components];
|
||||
case SVT_INT16:
|
||||
return useGLSLPrecision ? int16Types[components] : intTypes[components];
|
||||
case SVT_INT12:
|
||||
return useGLSLPrecision ? (emitLowp ? int12Types[components] : int16Types[components]) : intTypes[components];
|
||||
case SVT_FLOAT:
|
||||
return floatTypes[components];
|
||||
case SVT_FLOAT16:
|
||||
return useGLSLPrecision ? float16Types[components] : floatTypes[components];
|
||||
case SVT_FLOAT10:
|
||||
return useGLSLPrecision ? (emitLowp ? float10Types[components] : float16Types[components]) : floatTypes[components];
|
||||
case SVT_BOOL:
|
||||
return boolTypes[components];
|
||||
default:
|
||||
ASSERT(0);
|
||||
return " ";
|
||||
}
|
||||
}
|
||||
|
||||
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components)
|
||||
{
|
||||
static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" };
|
||||
static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" };
|
||||
static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" };
|
||||
static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" };
|
||||
static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" };
|
||||
static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" };
|
||||
static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" };
|
||||
|
||||
ASSERT(components >= 1 && components <= 4);
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case SVT_UINT:
|
||||
return uintTypes[components];
|
||||
case SVT_UINT16:
|
||||
case SVT_UINT8: // there is not uint8 in metal so treat it as ushort
|
||||
return ushortTypes[components];
|
||||
case SVT_INT:
|
||||
return intTypes[components];
|
||||
case SVT_INT16:
|
||||
case SVT_INT12:
|
||||
return shortTypes[components];
|
||||
case SVT_FLOAT:
|
||||
return floatTypes[components];
|
||||
case SVT_FLOAT16:
|
||||
case SVT_FLOAT10:
|
||||
return halfTypes[components];
|
||||
case SVT_BOOL:
|
||||
return boolTypes[components];
|
||||
default:
|
||||
ASSERT(0);
|
||||
return " ";
|
||||
}
|
||||
}
|
||||
|
||||
const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/)
|
||||
{
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL)
|
||||
return GetConstructorForTypeMetal(eType, components);
|
||||
else
|
||||
return GetConstructorForTypeGLSL(psContext, eType, components, useGLSLPrecision);
|
||||
}
|
||||
|
||||
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows)
|
||||
{
|
||||
std::string result;
|
||||
std::ostringstream oss;
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL)
|
||||
{
|
||||
switch (eBaseType)
|
||||
{
|
||||
case SVT_FLOAT:
|
||||
oss << "float" << columns << "x" << rows;
|
||||
break;
|
||||
case SVT_FLOAT16:
|
||||
case SVT_FLOAT10:
|
||||
oss << "half" << columns << "x" << rows;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (eBaseType)
|
||||
{
|
||||
case SVT_FLOAT:
|
||||
oss << "mat" << columns << "x" << rows;
|
||||
break;
|
||||
case SVT_FLOAT16:
|
||||
oss << "mediump mat" << columns << "x" << rows;
|
||||
break;
|
||||
case SVT_FLOAT10:
|
||||
oss << "lowp mat" << columns << "x" << rows;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
result = oss.str();
|
||||
return result;
|
||||
}
|
||||
|
||||
void AddSwizzleUsingElementCount(bstring dest, uint32_t count)
|
||||
{
|
||||
if (count == 4)
|
||||
return;
|
||||
if (count)
|
||||
{
|
||||
bcatcstr(dest, ".");
|
||||
bcatcstr(dest, "x");
|
||||
count--;
|
||||
}
|
||||
if (count)
|
||||
{
|
||||
bcatcstr(dest, "y");
|
||||
count--;
|
||||
}
|
||||
if (count)
|
||||
{
|
||||
bcatcstr(dest, "z");
|
||||
count--;
|
||||
}
|
||||
if (count)
|
||||
{
|
||||
bcatcstr(dest, "w");
|
||||
count--;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the bits set in mask
|
||||
int WriteMaskToComponentCount(uint32_t writeMask)
|
||||
{
|
||||
// In HLSL bytecode writemask 0 also means everything
|
||||
if (writeMask == 0)
|
||||
return 4;
|
||||
|
||||
return (int)GetNumberBitsSet(writeMask);
|
||||
}
|
||||
|
||||
uint32_t BuildComponentMaskFromElementCount(int count)
|
||||
{
|
||||
// Translate numComponents into bitmask
|
||||
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
|
||||
return (1 << count) - 1;
|
||||
}
|
||||
|
||||
// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
|
||||
bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src)
|
||||
{
|
||||
if (src == dest)
|
||||
return true;
|
||||
|
||||
if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) &&
|
||||
(src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16))
|
||||
return true;
|
||||
|
||||
if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) &&
|
||||
(src == SVT_INT || src == SVT_INT12 || src == SVT_INT16))
|
||||
return true;
|
||||
|
||||
if ((dest == SVT_UINT || dest == SVT_UINT16) &&
|
||||
(src == SVT_UINT || src == SVT_UINT16))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType)
|
||||
{
|
||||
if (eType == RETURN_TYPE_SINT)
|
||||
{
|
||||
return TO_FLAG_INTEGER;
|
||||
}
|
||||
else if (eType == RETURN_TYPE_UINT)
|
||||
{
|
||||
return TO_FLAG_UNSIGNED_INTEGER;
|
||||
}
|
||||
else
|
||||
{
|
||||
return TO_FLAG_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec)
|
||||
{
|
||||
if (eType == RETURN_TYPE_SINT)
|
||||
{
|
||||
switch (ePrec)
|
||||
{
|
||||
default:
|
||||
return SVT_INT;
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
return SVT_INT12;
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
return SVT_INT16;
|
||||
}
|
||||
}
|
||||
else if (eType == RETURN_TYPE_UINT)
|
||||
{
|
||||
switch (ePrec)
|
||||
{
|
||||
default:
|
||||
return SVT_UINT;
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
return SVT_UINT8;
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
return SVT_UINT16;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (ePrec)
|
||||
{
|
||||
default:
|
||||
return SVT_FLOAT;
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
return SVT_FLOAT10;
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
return SVT_FLOAT16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case SVT_INT:
|
||||
case SVT_INT12:
|
||||
case SVT_INT16:
|
||||
return RETURN_TYPE_SINT;
|
||||
case SVT_UINT:
|
||||
case SVT_UINT16:
|
||||
return RETURN_TYPE_UINT;
|
||||
case SVT_FLOAT:
|
||||
case SVT_FLOAT10:
|
||||
case SVT_FLOAT16:
|
||||
return RETURN_TYPE_FLOAT;
|
||||
default:
|
||||
return RETURN_TYPE_UNUSED;
|
||||
}
|
||||
}
|
||||
|
||||
REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case SVT_INT:
|
||||
case SVT_UINT:
|
||||
case SVT_FLOAT:
|
||||
return REFLECT_RESOURCE_PRECISION_HIGHP;
|
||||
case SVT_INT16:
|
||||
case SVT_UINT16:
|
||||
case SVT_FLOAT16:
|
||||
return REFLECT_RESOURCE_PRECISION_MEDIUMP;
|
||||
case SVT_INT12:
|
||||
case SVT_FLOAT10:
|
||||
case SVT_UINT8:
|
||||
return REFLECT_RESOURCE_PRECISION_LOWP;
|
||||
default:
|
||||
return REFLECT_RESOURCE_PRECISION_UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount)
|
||||
{
|
||||
return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2);
|
||||
}
|
||||
|
||||
// Returns true if the operation is commutative
|
||||
bool IsOperationCommutative(int eOpCode)
|
||||
{
|
||||
switch ((OPCODE_TYPE)eOpCode)
|
||||
{
|
||||
case OPCODE_DADD:
|
||||
case OPCODE_IADD:
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_MUL:
|
||||
case OPCODE_IMUL:
|
||||
case OPCODE_OR:
|
||||
case OPCODE_AND:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if operands are identical, only cares about temp registers currently.
|
||||
bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB)
|
||||
{
|
||||
if (!psA || !psB)
|
||||
return 0;
|
||||
|
||||
if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP)
|
||||
return 0;
|
||||
|
||||
if (psA->eModifier != psB->eModifier)
|
||||
return 0;
|
||||
|
||||
if (psA->iNumComponents != psB->iNumComponents)
|
||||
return 0;
|
||||
|
||||
if (psA->ui32RegisterNumber != psB->ui32RegisterNumber)
|
||||
return 0;
|
||||
|
||||
if (psA->eSelMode != psB->eSelMode)
|
||||
return 0;
|
||||
|
||||
if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask)
|
||||
return 0;
|
||||
|
||||
if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0])
|
||||
return 0;
|
||||
|
||||
if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0]))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool IsAddOneInstruction(const Instruction *psInst)
|
||||
{
|
||||
if (psInst->eOpcode != OPCODE_IADD)
|
||||
return false;
|
||||
if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return false;
|
||||
|
||||
if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
|
||||
return false;
|
||||
if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32)
|
||||
return false;
|
||||
|
||||
if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32)
|
||||
return false;
|
||||
if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP)
|
||||
return false;
|
||||
|
||||
if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
|
||||
return false;
|
||||
|
||||
if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim)
|
||||
{
|
||||
switch ((RESOURCE_DIMENSION)eResDim)
|
||||
{
|
||||
case RESOURCE_DIMENSION_TEXTURE1D:
|
||||
return 1;
|
||||
case RESOURCE_DIMENSION_TEXTURE2D:
|
||||
case RESOURCE_DIMENSION_TEXTURE2DMS:
|
||||
case RESOURCE_DIMENSION_TEXTURE1DARRAY:
|
||||
case RESOURCE_DIMENSION_TEXTURECUBE:
|
||||
return 2;
|
||||
case RESOURCE_DIMENSION_TEXTURE3D:
|
||||
case RESOURCE_DIMENSION_TEXTURE2DARRAY:
|
||||
case RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
|
||||
case RESOURCE_DIMENSION_TEXTURECUBEARRAY:
|
||||
return 3;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Returns the "more important" type of a and b, currently int < uint < float
|
||||
SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b)
|
||||
{
|
||||
#define DO_CHECK(type) if( a == type || b == type ) return type
|
||||
|
||||
// Priority ordering
|
||||
DO_CHECK(SVT_FLOAT16);
|
||||
DO_CHECK(SVT_FLOAT10);
|
||||
DO_CHECK(SVT_UINT16);
|
||||
DO_CHECK(SVT_UINT8);
|
||||
DO_CHECK(SVT_INT16);
|
||||
DO_CHECK(SVT_INT12);
|
||||
DO_CHECK(SVT_FORCED_INT);
|
||||
DO_CHECK(SVT_FLOAT);
|
||||
DO_CHECK(SVT_UINT);
|
||||
DO_CHECK(SVT_INT);
|
||||
DO_CHECK(SVT_INT_AMBIGUOUS);
|
||||
|
||||
#undef DO_CHECK
|
||||
// After these just rely on ordering.
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
// Returns true if a direct constructor can convert src->dest
|
||||
bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest)
|
||||
{
|
||||
// uint<->int<->bool conversions possible
|
||||
if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) &&
|
||||
(dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16))
|
||||
return true;
|
||||
|
||||
// float<->double possible
|
||||
if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) &&
|
||||
(dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10))
|
||||
return true;
|
||||
|
||||
if (context->psShader->eTargetLanguage == LANG_METAL)
|
||||
{
|
||||
// avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int' or 'int', types of different size
|
||||
if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT || dest == SVT_INT))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf)
|
||||
{
|
||||
return psCBuf != NULL && psCBuf->asVars.size() == 1
|
||||
&& psCBuf->asVars[0].sType.Class == SVC_STRUCT && psCBuf->asVars[0].sType.Elements == 2
|
||||
&& IsUnityInstancingConstantBufferName(psCBuf->name.c_str());
|
||||
}
|
||||
|
||||
#ifndef fpcheck
|
||||
#ifdef _MSC_VER
|
||||
#define fpcheck(x) (_isnan(x) || !_finite(x))
|
||||
#else
|
||||
#define fpcheck(x) (std::isnan(x) || std::isinf(x))
|
||||
#endif
|
||||
#endif // #ifndef fpcheck
|
||||
|
||||
// Helper function to print floats with full precision
|
||||
void PrintFloat(bstring b, float f)
|
||||
{
|
||||
bstring temp;
|
||||
int ePos;
|
||||
int pointPos;
|
||||
|
||||
temp = bformat("%.9g", f);
|
||||
ePos = bstrchrp(temp, 'e', 0);
|
||||
pointPos = bstrchrp(temp, '.', 0);
|
||||
|
||||
bconcat(b, temp);
|
||||
bdestroy(temp);
|
||||
|
||||
if (ePos < 0 && pointPos < 0 && !fpcheck(f))
|
||||
bcatcstr(b, ".0");
|
||||
}
|
||||
|
||||
bstring GetEarlyMain(HLSLCrossCompilerContext *psContext)
|
||||
{
|
||||
bstring *oldString = psContext->currentGLSLString;
|
||||
bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain;
|
||||
int indent = psContext->indent;
|
||||
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent)
|
||||
++psContext->indent;
|
||||
|
||||
psContext->currentGLSLString = str;
|
||||
psContext->AddIndentation();
|
||||
psContext->currentGLSLString = oldString;
|
||||
psContext->indent = indent;
|
||||
|
||||
return *str;
|
||||
}
|
||||
|
||||
bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext)
|
||||
{
|
||||
bstring *oldString = psContext->currentGLSLString;
|
||||
bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode;
|
||||
int indent = psContext->indent;
|
||||
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent)
|
||||
++psContext->indent;
|
||||
|
||||
psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1;
|
||||
|
||||
psContext->currentGLSLString = str;
|
||||
psContext->AddIndentation();
|
||||
psContext->currentGLSLString = oldString;
|
||||
psContext->indent = indent;
|
||||
|
||||
return *str;
|
||||
}
|
||||
}
|
10
third_party/HLSLcc/src/HLSLccTypes.natvis
vendored
Normal file
10
third_party/HLSLcc/src/HLSLccTypes.natvis
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
|
||||
<Type Name="Instruction">
|
||||
<DisplayString>{{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}}</DisplayString>
|
||||
</Type>
|
||||
<Type Name="Operand">
|
||||
<DisplayString>{{ type={eType}, reg={ui32RegisterNumber} }}</DisplayString>
|
||||
</Type>
|
||||
|
||||
</AutoVisualizer>
|
349
third_party/HLSLcc/src/Instruction.cpp
vendored
Normal file
349
third_party/HLSLcc/src/Instruction.cpp
vendored
Normal file
@ -0,0 +1,349 @@
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "include/ShaderInfo.h"
|
||||
|
||||
// Returns the result swizzle operand for an instruction, or NULL if all src operands have swizzles
|
||||
static Operand *GetSrcSwizzleOperand(Instruction *psInst)
|
||||
{
|
||||
switch (psInst->eOpcode)
|
||||
{
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_NOP:
|
||||
case OPCODE_SWAPC:
|
||||
case OPCODE_SAMPLE_C:
|
||||
case OPCODE_SAMPLE_C_LZ:
|
||||
ASSERT(0);
|
||||
return NULL;
|
||||
|
||||
// Normal arithmetics, all srcs have swizzles
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_AND:
|
||||
case OPCODE_DERIV_RTX:
|
||||
case OPCODE_DERIV_RTX_COARSE:
|
||||
case OPCODE_DERIV_RTX_FINE:
|
||||
case OPCODE_DERIV_RTY:
|
||||
case OPCODE_DERIV_RTY_COARSE:
|
||||
case OPCODE_DERIV_RTY_FINE:
|
||||
case OPCODE_DIV:
|
||||
case OPCODE_EQ:
|
||||
case OPCODE_EXP:
|
||||
case OPCODE_FRC:
|
||||
case OPCODE_FTOI:
|
||||
case OPCODE_FTOU:
|
||||
case OPCODE_GE:
|
||||
case OPCODE_IADD:
|
||||
case OPCODE_IEQ:
|
||||
case OPCODE_IGE:
|
||||
case OPCODE_ILT:
|
||||
case OPCODE_IMAD:
|
||||
case OPCODE_IMAX:
|
||||
case OPCODE_IMIN:
|
||||
case OPCODE_IMUL:
|
||||
case OPCODE_INE:
|
||||
case OPCODE_INEG:
|
||||
case OPCODE_ITOF:
|
||||
case OPCODE_LOG:
|
||||
case OPCODE_LT:
|
||||
case OPCODE_MAD:
|
||||
case OPCODE_MAX:
|
||||
case OPCODE_MIN:
|
||||
case OPCODE_MOV:
|
||||
case OPCODE_MUL:
|
||||
case OPCODE_NE:
|
||||
case OPCODE_NOT:
|
||||
case OPCODE_OR:
|
||||
case OPCODE_ROUND_NE:
|
||||
case OPCODE_ROUND_NI:
|
||||
case OPCODE_ROUND_PI:
|
||||
case OPCODE_ROUND_Z:
|
||||
case OPCODE_RSQ:
|
||||
case OPCODE_SINCOS:
|
||||
case OPCODE_SQRT:
|
||||
case OPCODE_UDIV:
|
||||
case OPCODE_UGE:
|
||||
case OPCODE_ULT:
|
||||
case OPCODE_UMAD:
|
||||
case OPCODE_UMAX:
|
||||
case OPCODE_UMIN:
|
||||
case OPCODE_UMUL:
|
||||
case OPCODE_UTOF:
|
||||
case OPCODE_XOR:
|
||||
|
||||
case OPCODE_BFI:
|
||||
case OPCODE_BFREV:
|
||||
case OPCODE_COUNTBITS:
|
||||
case OPCODE_DADD:
|
||||
case OPCODE_DDIV:
|
||||
case OPCODE_DEQ:
|
||||
case OPCODE_DFMA:
|
||||
case OPCODE_DGE:
|
||||
case OPCODE_DLT:
|
||||
case OPCODE_DMAX:
|
||||
case OPCODE_DMIN:
|
||||
case OPCODE_DMUL:
|
||||
case OPCODE_DMOV:
|
||||
case OPCODE_DNE:
|
||||
case OPCODE_DRCP:
|
||||
case OPCODE_DTOF:
|
||||
case OPCODE_F16TOF32:
|
||||
case OPCODE_F32TOF16:
|
||||
case OPCODE_FIRSTBIT_HI:
|
||||
case OPCODE_FIRSTBIT_LO:
|
||||
case OPCODE_FIRSTBIT_SHI:
|
||||
case OPCODE_FTOD:
|
||||
case OPCODE_IBFE:
|
||||
case OPCODE_RCP:
|
||||
case OPCODE_UADDC:
|
||||
case OPCODE_UBFE:
|
||||
case OPCODE_USUBB:
|
||||
case OPCODE_MOVC:
|
||||
case OPCODE_DMOVC:
|
||||
return NULL;
|
||||
|
||||
// Special cases:
|
||||
case OPCODE_GATHER4:
|
||||
case OPCODE_GATHER4_C:
|
||||
case OPCODE_LD:
|
||||
case OPCODE_LD_MS:
|
||||
case OPCODE_LOD:
|
||||
case OPCODE_LD_UAV_TYPED:
|
||||
case OPCODE_LD_RAW:
|
||||
case OPCODE_SAMPLE:
|
||||
case OPCODE_SAMPLE_B:
|
||||
case OPCODE_SAMPLE_L:
|
||||
case OPCODE_SAMPLE_D:
|
||||
case OPCODE_RESINFO:
|
||||
return &psInst->asOperands[2];
|
||||
|
||||
case OPCODE_GATHER4_PO:
|
||||
case OPCODE_GATHER4_PO_C:
|
||||
case OPCODE_LD_STRUCTURED:
|
||||
return &psInst->asOperands[3];
|
||||
|
||||
case OPCODE_SAMPLE_INFO:
|
||||
return &psInst->asOperands[1];
|
||||
|
||||
case OPCODE_ISHL:
|
||||
case OPCODE_ISHR:
|
||||
case OPCODE_USHR:
|
||||
// sm4 variant has single component selection on src1 -> only src0 has swizzle
|
||||
if (psInst->asOperands[2].eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
|
||||
return &psInst->asOperands[1];
|
||||
else // whereas sm5 variant has swizzle also on src1
|
||||
return NULL;
|
||||
|
||||
default:
|
||||
ASSERT(0);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Tweak the source operands of an instruction so that the rebased write mask will still work
|
||||
static void DoSrcOperandRebase(Operand *psOperand, uint32_t rebase)
|
||||
{
|
||||
uint32_t i;
|
||||
switch (psOperand->eSelMode)
|
||||
{
|
||||
default:
|
||||
case OPERAND_4_COMPONENT_MASK_MODE:
|
||||
ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL);
|
||||
|
||||
// Special case for immediates, they do not have swizzles
|
||||
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
{
|
||||
if (psOperand->iNumComponents > 1)
|
||||
std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]);
|
||||
return;
|
||||
}
|
||||
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64)
|
||||
{
|
||||
if (psOperand->iNumComponents > 1)
|
||||
std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
// Need to change this to swizzle
|
||||
psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE;
|
||||
psOperand->ui32Swizzle = 0;
|
||||
for (i = 0; i < 4 - rebase; i++)
|
||||
psOperand->aui32Swizzle[i] = i + rebase;
|
||||
for (; i < 4; i++)
|
||||
psOperand->aui32Swizzle[i] = rebase; // The first actual input.
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_SELECT_1_MODE:
|
||||
// Nothing to do
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
|
||||
for (i = rebase; i < 4; i++)
|
||||
psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
uint32_t accessMask = 0;
|
||||
int isDestination = 0;
|
||||
Operand *psSwizzleOperand = NULL;
|
||||
|
||||
if (flags & UD_CHANGE_SUBOPERANDS)
|
||||
{
|
||||
for (i = 0; i < MAX_SUB_OPERANDS; i++)
|
||||
{
|
||||
if (psOperand->m_SubOperands[i].get())
|
||||
ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase);
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & UD_CHANGE_MAIN_OPERAND) == 0)
|
||||
return;
|
||||
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
if (psOperand->ui32RegisterNumber != oldReg)
|
||||
return;
|
||||
|
||||
accessMask = psOperand->GetAccessMask();
|
||||
// If this operation touches other components than the one(s) we're splitting, skip it
|
||||
if ((accessMask & (~compMask)) != 0)
|
||||
{
|
||||
// Verify that we've not messed up in reachability analysis.
|
||||
// This would mean that we've encountered an instruction that accesses
|
||||
// a component in multi-component mode and we're supposed to treat it as single-use only.
|
||||
// Now that we track operands we can bring this back
|
||||
ASSERT((accessMask & compMask) == 0);
|
||||
return;
|
||||
}
|
||||
|
||||
#if 0
|
||||
printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask);
|
||||
#endif
|
||||
psOperand->ui32RegisterNumber = newReg;
|
||||
|
||||
if (rebase == 0)
|
||||
return;
|
||||
|
||||
// Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask.
|
||||
switch (psOperand->eSelMode)
|
||||
{
|
||||
case OPERAND_4_COMPONENT_MASK_MODE:
|
||||
{
|
||||
uint32_t oldMask = psOperand->ui32CompMask;
|
||||
if (oldMask == 0)
|
||||
oldMask = OPERAND_4_COMPONENT_MASK_ALL;
|
||||
|
||||
// Check that we're not losing any information
|
||||
ASSERT((oldMask >> rebase) << rebase == oldMask);
|
||||
psOperand->ui32CompMask = (oldMask >> rebase);
|
||||
break;
|
||||
}
|
||||
case OPERAND_4_COMPONENT_SELECT_1_MODE:
|
||||
ASSERT(psOperand->aui32Swizzle[0] >= rebase);
|
||||
psOperand->aui32Swizzle[0] -= rebase;
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
// Note that this rebase is different from the one done for source operands
|
||||
ASSERT(psOperand->aui32Swizzle[i] >= rebase);
|
||||
psOperand->aui32Swizzle[i] -= rebase;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
// Tweak operand datatypes
|
||||
std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]);
|
||||
|
||||
// If this operand is a destination, we'll need to tweak sources as well
|
||||
for (i = 0; i < ui32FirstSrc; i++)
|
||||
{
|
||||
if (psOperand == &asOperands[i])
|
||||
{
|
||||
isDestination = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isDestination == 0)
|
||||
return;
|
||||
|
||||
// Nasty corner case of 2 destinations, not supported if both targets are written
|
||||
ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL));
|
||||
|
||||
// If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction
|
||||
switch (eOpcode)
|
||||
{
|
||||
// The opcodes that do not need tweaking:
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_BUFINFO:
|
||||
case OPCODE_SAMPLE_C:
|
||||
case OPCODE_SAMPLE_C_LZ:
|
||||
return;
|
||||
|
||||
default:
|
||||
psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands
|
||||
if (psSwizzleOperand)
|
||||
{
|
||||
DoSrcOperandRebase(psSwizzleOperand, rebase);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = ui32FirstSrc; i < ui32NumOperands; i++)
|
||||
{
|
||||
DoSrcOperandRebase(&asOperands[i], rebase);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns nonzero if psInst is a sample instruction and the sampler has medium or low precision
|
||||
bool Instruction::IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const
|
||||
{
|
||||
const Operand *op;
|
||||
const ResourceBinding *psBinding = NULL;
|
||||
OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
|
||||
switch (eOpcode)
|
||||
{
|
||||
default:
|
||||
return false;
|
||||
case OPCODE_SAMPLE:
|
||||
case OPCODE_SAMPLE_B:
|
||||
case OPCODE_SAMPLE_L:
|
||||
case OPCODE_SAMPLE_D:
|
||||
case OPCODE_SAMPLE_C:
|
||||
case OPCODE_SAMPLE_C_LZ:
|
||||
break;
|
||||
}
|
||||
|
||||
op = &asOperands[3];
|
||||
ASSERT(op->eType == OPERAND_TYPE_SAMPLER);
|
||||
|
||||
info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding);
|
||||
if (!psBinding)
|
||||
{
|
||||
/* Try to look from texture group */
|
||||
info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding);
|
||||
}
|
||||
|
||||
sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN);
|
||||
|
||||
if (sType == OPERAND_MIN_PRECISION_DEFAULT)
|
||||
return false;
|
||||
|
||||
if (pType)
|
||||
*pType = sType;
|
||||
|
||||
return true;
|
||||
}
|
370
third_party/HLSLcc/src/LoopTransform.cpp
vendored
Normal file
370
third_party/HLSLcc/src/LoopTransform.cpp
vendored
Normal file
@ -0,0 +1,370 @@
|
||||
#include "src/internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "src/internal_includes/LoopTransform.h"
|
||||
#include "src/internal_includes/Shader.h"
|
||||
#include "src/internal_includes/debug.h"
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
struct LoopInfo
|
||||
{
|
||||
public:
|
||||
LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {}
|
||||
|
||||
Instruction * m_StartLoop; // OPCODE_LOOP
|
||||
Instruction * m_EndLoop; // OPCODE_ENDLOOP that matches the LOOP above.
|
||||
std::vector<Instruction *> m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth
|
||||
bool m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing.
|
||||
};
|
||||
|
||||
typedef std::list<LoopInfo> Loops;
|
||||
|
||||
// Build a loopinfo array of all the loops in this shader phase
|
||||
void BuildLoopInfo(ShaderPhase &phase, Loops &res)
|
||||
{
|
||||
using namespace std;
|
||||
res.clear();
|
||||
|
||||
// A stack of loopinfo elements (stored in res)
|
||||
list<LoopInfo *> loopStack;
|
||||
|
||||
// Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here.
|
||||
list<LoopInfo> dummyLIForSwitches;
|
||||
|
||||
for (std::vector<Instruction>::iterator instItr = phase.psInst.begin(); instItr != phase.psInst.end(); instItr++)
|
||||
{
|
||||
Instruction *i = &*instItr;
|
||||
|
||||
if (i->eOpcode == OPCODE_LOOP)
|
||||
{
|
||||
LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo());
|
||||
currLoopInfo->m_StartLoop = i;
|
||||
loopStack.push_front(currLoopInfo);
|
||||
}
|
||||
else if (i->eOpcode == OPCODE_ENDLOOP)
|
||||
{
|
||||
ASSERT(!loopStack.empty());
|
||||
LoopInfo *li = *loopStack.begin();
|
||||
loopStack.pop_front();
|
||||
li->m_EndLoop = i;
|
||||
}
|
||||
else if (i->eOpcode == OPCODE_SWITCH)
|
||||
{
|
||||
// Create a dummy entry into the stack
|
||||
LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo());
|
||||
li->m_IsSwitch = true;
|
||||
loopStack.push_front(li);
|
||||
}
|
||||
else if (i->eOpcode == OPCODE_ENDSWITCH)
|
||||
{
|
||||
ASSERT(!loopStack.empty());
|
||||
LoopInfo *li = *loopStack.begin();
|
||||
loopStack.pop_front();
|
||||
ASSERT(li->m_IsSwitch);
|
||||
}
|
||||
else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC)
|
||||
{
|
||||
// Get the current loopstack head
|
||||
ASSERT(!loopStack.empty());
|
||||
LoopInfo *li = *loopStack.begin();
|
||||
// Ignore breaks from switch-cases
|
||||
if (!li->m_IsSwitch)
|
||||
{
|
||||
li->m_ExitPoints.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp
|
||||
static bool IsScalarTempComparisonInstruction(const Instruction *i)
|
||||
{
|
||||
switch (i->eOpcode)
|
||||
{
|
||||
default:
|
||||
return false;
|
||||
case OPCODE_IGE:
|
||||
case OPCODE_ILT:
|
||||
case OPCODE_IEQ:
|
||||
case OPCODE_INE:
|
||||
case OPCODE_UGE:
|
||||
case OPCODE_ULT:
|
||||
break;
|
||||
}
|
||||
|
||||
if (i->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return false;
|
||||
|
||||
int tempOp = -1;
|
||||
if (i->asOperands[1].eType == OPERAND_TYPE_TEMP)
|
||||
tempOp = 1;
|
||||
else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP)
|
||||
tempOp = 2;
|
||||
|
||||
// Also reject comparisons where we compare temp.x vs temp.y
|
||||
if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber)
|
||||
return false;
|
||||
|
||||
if (tempOp == -1)
|
||||
return false;
|
||||
|
||||
if (i->asOperands[0].GetNumSwizzleElements() != 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX <op> imm32
|
||||
static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b)
|
||||
{
|
||||
if (a->eOpcode != b->eOpcode)
|
||||
return false;
|
||||
ASSERT(a->ui32NumOperands == b->ui32NumOperands);
|
||||
uint32_t dstReg = 0;
|
||||
if (a->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return false;
|
||||
dstReg = a->asOperands[0].ui32RegisterNumber;
|
||||
|
||||
for (uint32_t i = 0; i < a->ui32NumOperands; i++)
|
||||
{
|
||||
const Operand &aop = a->asOperands[i];
|
||||
const Operand &bop = b->asOperands[i];
|
||||
if (aop.eType != bop.eType)
|
||||
return false;
|
||||
|
||||
if (aop.GetAccessMask() != bop.GetAccessMask())
|
||||
return false;
|
||||
|
||||
if (aop.GetNumSwizzleElements() != 1)
|
||||
return false;
|
||||
|
||||
if (aop.eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
if (aop.ui32RegisterNumber != bop.ui32RegisterNumber)
|
||||
return false;
|
||||
if (aop.ui32RegisterNumber != dstReg)
|
||||
return false;
|
||||
}
|
||||
else if (aop.eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
{
|
||||
if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempt to transform a single loop into a for-statement
|
||||
static void AttemptLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase, LoopInfo &li)
|
||||
{
|
||||
// In order to transform a loop into a for, the following has to hold:
|
||||
// - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC.
|
||||
// - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above
|
||||
// Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement.
|
||||
// Also, the loop induction variable must be standalone (as in, never used as part of a larger vector)
|
||||
|
||||
Instruction *cmpInst = li.m_StartLoop + 1;
|
||||
|
||||
if (!IsScalarTempComparisonInstruction(cmpInst))
|
||||
return;
|
||||
|
||||
Instruction *breakInst = li.m_StartLoop + 2;
|
||||
if (breakInst->eOpcode != OPCODE_BREAKC)
|
||||
return;
|
||||
if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber)
|
||||
return;
|
||||
|
||||
// Check that the comparison result isn't used anywhere else
|
||||
if (cmpInst->m_Uses.size() != 1)
|
||||
return;
|
||||
|
||||
ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst);
|
||||
|
||||
// Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable
|
||||
uint32_t inductionVarIdx = 0;
|
||||
|
||||
Instruction *lastInst = li.m_EndLoop - 1;
|
||||
if (lastInst->eOpcode != OPCODE_IADD)
|
||||
return;
|
||||
if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
if (lastInst->asOperands[0].GetNumSwizzleElements() != 1)
|
||||
return;
|
||||
|
||||
uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber;
|
||||
// Verify that the induction variable actually matches.
|
||||
if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar)
|
||||
inductionVarIdx = 1;
|
||||
else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar)
|
||||
inductionVarIdx = 2;
|
||||
else
|
||||
return;
|
||||
|
||||
// Verify that we also read from the induction variable in the last instruction
|
||||
if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) ||
|
||||
(lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar)))
|
||||
return;
|
||||
|
||||
// Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops,
|
||||
// but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex")
|
||||
// This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing.
|
||||
// So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop.
|
||||
if (psContext->psShader->eTargetLanguage >= LANG_400 && psContext->psShader->eTargetLanguage < LANG_GL_LAST && !psContext->IsVulkan())
|
||||
{
|
||||
for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++)
|
||||
{
|
||||
switch (itr->eOpcode)
|
||||
{
|
||||
case OPCODE_LD_RAW:
|
||||
case OPCODE_LD_STRUCTURED:
|
||||
case OPCODE_LD_UAV_TYPED:
|
||||
case OPCODE_STORE_RAW:
|
||||
case OPCODE_STORE_STRUCTURED:
|
||||
case OPCODE_STORE_UAV_TYPED:
|
||||
return; // Nope, can't do a for, not even a partial one.
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst.
|
||||
// Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called.
|
||||
// Of course, if all those instructions are identical, then it's fine.
|
||||
// Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well.
|
||||
|
||||
Instruction *initializer = NULL;
|
||||
std::vector<const Operand::Define *> definitionsOutsideRange;
|
||||
std::vector<const Operand::Define *> definitionsInsideRange;
|
||||
std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def)
|
||||
{
|
||||
if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop)
|
||||
definitionsOutsideRange.push_back(&def);
|
||||
else
|
||||
definitionsInsideRange.push_back(&def);
|
||||
});
|
||||
|
||||
if (definitionsInsideRange.size() != 1)
|
||||
{
|
||||
// All definitions must be identical
|
||||
for (std::vector<const Operand::Define*>::iterator itr = definitionsInsideRange.begin() + 1; itr != definitionsInsideRange.end(); itr++)
|
||||
{
|
||||
if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(definitionsOutsideRange.size() > 0);
|
||||
if (definitionsOutsideRange.size() == 1)
|
||||
initializer = definitionsOutsideRange[0]->m_Inst;
|
||||
|
||||
// Initializer must only write to one component
|
||||
if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1)
|
||||
initializer = 0;
|
||||
// Initializer data type must be int or uint
|
||||
if (initializer)
|
||||
{
|
||||
SHADER_VARIABLE_TYPE dataType = initializer->asOperands[0].GetDataType(psContext);
|
||||
if (dataType != SVT_INT && dataType != SVT_UINT)
|
||||
return;
|
||||
}
|
||||
|
||||
// Check that the initializer is only used within the range so we can move it to for statement
|
||||
if (initializer)
|
||||
{
|
||||
bool hasUsesOutsideRange = false;
|
||||
std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u)
|
||||
{
|
||||
if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
|
||||
hasUsesOutsideRange = true;
|
||||
});
|
||||
// Has outside uses? we cannot pull that up to the for statement
|
||||
if (hasUsesOutsideRange)
|
||||
initializer = 0;
|
||||
}
|
||||
|
||||
// Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either
|
||||
if (initializer)
|
||||
{
|
||||
bool cannotDoInitializer = false;
|
||||
for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++)
|
||||
{
|
||||
const Instruction::Use &u = *itr;
|
||||
if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
|
||||
{
|
||||
cannotDoInitializer = true;
|
||||
break;
|
||||
}
|
||||
// Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var)
|
||||
if (u.m_Op->GetAccessMask() != 1)
|
||||
{
|
||||
cannotDoInitializer = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Has outside uses? we cannot pull that up to the for statement
|
||||
if (cannotDoInitializer)
|
||||
initializer = 0;
|
||||
}
|
||||
|
||||
|
||||
if (initializer)
|
||||
{
|
||||
// We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that.
|
||||
uint32_t newRegister = phase.m_NextFreeTempRegister++;
|
||||
li.m_StartLoop->m_InductorRegister = newRegister;
|
||||
std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u)
|
||||
{
|
||||
u.m_Op->m_ForLoopInductorName = newRegister;
|
||||
});
|
||||
// Also tweak the destinations for cmpInst, and lastInst
|
||||
if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
|
||||
cmpInst->asOperands[1].m_ForLoopInductorName = newRegister;
|
||||
else
|
||||
cmpInst->asOperands[2].m_ForLoopInductorName = newRegister;
|
||||
|
||||
if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
|
||||
lastInst->asOperands[1].m_ForLoopInductorName = newRegister;
|
||||
else
|
||||
lastInst->asOperands[2].m_ForLoopInductorName = newRegister;
|
||||
|
||||
lastInst->asOperands[0].m_ForLoopInductorName = newRegister;
|
||||
initializer->asOperands[0].m_ForLoopInductorName = newRegister;
|
||||
}
|
||||
|
||||
// This loop can be transformed to for-loop. Do the necessary magicks.
|
||||
li.m_StartLoop->m_LoopInductors[0] = initializer;
|
||||
li.m_StartLoop->m_LoopInductors[1] = cmpInst;
|
||||
li.m_StartLoop->m_LoopInductors[2] = breakInst;
|
||||
li.m_StartLoop->m_LoopInductors[3] = lastInst;
|
||||
|
||||
if (initializer)
|
||||
initializer->m_SkipTranslation = true;
|
||||
cmpInst->m_SkipTranslation = true;
|
||||
breakInst->m_SkipTranslation = true;
|
||||
lastInst->m_SkipTranslation = true;
|
||||
}
|
||||
|
||||
void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase)
|
||||
{
|
||||
Loops loops;
|
||||
BuildLoopInfo(phase, loops);
|
||||
|
||||
std::for_each(loops.begin(), loops.end(), [&phase, psContext](LoopInfo &li)
|
||||
{
|
||||
// Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point
|
||||
// Also that there's at least 2 instructions in loop body
|
||||
ASSERT(li.m_StartLoop != 0);
|
||||
ASSERT(li.m_EndLoop != 0);
|
||||
ASSERT(li.m_EndLoop > li.m_StartLoop + 2);
|
||||
ASSERT(!li.m_IsSwitch);
|
||||
ASSERT(!li.m_ExitPoints.empty());
|
||||
AttemptLoopTransform(psContext, phase, li);
|
||||
});
|
||||
}
|
||||
}
|
641
third_party/HLSLcc/src/Operand.cpp
vendored
Normal file
641
third_party/HLSLcc/src/Operand.cpp
vendored
Normal file
@ -0,0 +1,641 @@
|
||||
#include "internal_includes/Operand.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
|
||||
uint32_t Operand::GetAccessMask() const
|
||||
{
|
||||
int i;
|
||||
uint32_t accessMask = 0;
|
||||
// NOTE: Destination writemask can (AND DOES) affect access from sources, but we do it conservatively for now.
|
||||
switch (eSelMode)
|
||||
{
|
||||
default:
|
||||
case OPERAND_4_COMPONENT_MASK_MODE:
|
||||
// Update access mask
|
||||
accessMask = ui32CompMask;
|
||||
if (accessMask == 0)
|
||||
accessMask = OPERAND_4_COMPONENT_MASK_ALL;
|
||||
break;
|
||||
|
||||
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
|
||||
accessMask = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
accessMask |= 1 << (aui32Swizzle[i]);
|
||||
break;
|
||||
|
||||
case OPERAND_4_COMPONENT_SELECT_1_MODE:
|
||||
accessMask = 1 << (aui32Swizzle[0]);
|
||||
break;
|
||||
}
|
||||
ASSERT(accessMask != 0);
|
||||
return accessMask;
|
||||
}
|
||||
|
||||
int Operand::GetMaxComponent() const
|
||||
{
|
||||
if (iWriteMaskEnabled &&
|
||||
iNumComponents == 4)
|
||||
{
|
||||
//Component Mask
|
||||
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
|
||||
{
|
||||
if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W))
|
||||
{
|
||||
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W)
|
||||
{
|
||||
return 4;
|
||||
}
|
||||
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
//Component Swizzle
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
|
||||
{
|
||||
if (ui32Swizzle == NO_SWIZZLE)
|
||||
return 4;
|
||||
|
||||
uint32_t res = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
res = std::max(aui32Swizzle[i], res);
|
||||
}
|
||||
return (int)res + 1;
|
||||
}
|
||||
else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 4;
|
||||
}
|
||||
|
||||
//Single component repeated
|
||||
//e..g .wwww
|
||||
bool Operand::IsSwizzleReplicated() const
|
||||
{
|
||||
if (iWriteMaskEnabled &&
|
||||
iNumComponents == 4)
|
||||
{
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
|
||||
{
|
||||
if (ui32Swizzle == WWWW_SWIZZLE ||
|
||||
ui32Swizzle == ZZZZ_SWIZZLE ||
|
||||
ui32Swizzle == YYYY_SWIZZLE ||
|
||||
ui32Swizzle == XXXX_SWIZZLE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the number of elements returned by operand, taking additional component mask into account
|
||||
uint32_t Operand::GetNumSwizzleElements(uint32_t _ui32CompMask /* = OPERAND_4_COMPONENT_MASK_ALL */) const
|
||||
{
|
||||
uint32_t count = 0;
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID:
|
||||
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
|
||||
// Adjust component count and break to more processing
|
||||
((Operand *)this)->iNumComponents = 3;
|
||||
break;
|
||||
case OPERAND_TYPE_IMMEDIATE32:
|
||||
case OPERAND_TYPE_IMMEDIATE64:
|
||||
case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
|
||||
case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
|
||||
case OPERAND_TYPE_OUTPUT_DEPTH:
|
||||
{
|
||||
// Translate numComponents into bitmask
|
||||
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
|
||||
uint32_t compMask = (1 << iNumComponents) - 1;
|
||||
|
||||
compMask &= _ui32CompMask;
|
||||
// Calculate bits left in compMask
|
||||
return HLSLcc::GetNumberBitsSet(compMask);
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (iWriteMaskEnabled &&
|
||||
iNumComponents != 1)
|
||||
{
|
||||
//Component Mask
|
||||
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
|
||||
{
|
||||
uint32_t compMask = ui32CompMask;
|
||||
if (compMask == 0)
|
||||
compMask = OPERAND_4_COMPONENT_MASK_ALL;
|
||||
compMask &= _ui32CompMask;
|
||||
|
||||
if (compMask == OPERAND_4_COMPONENT_MASK_ALL)
|
||||
return 4;
|
||||
|
||||
if (compMask & OPERAND_4_COMPONENT_MASK_X)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (compMask & OPERAND_4_COMPONENT_MASK_Y)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (compMask & OPERAND_4_COMPONENT_MASK_Z)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (compMask & OPERAND_4_COMPONENT_MASK_W)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
else
|
||||
//Component Swizzle
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
if ((_ui32CompMask & (1 << i)) == 0)
|
||||
continue;
|
||||
|
||||
count++;
|
||||
}
|
||||
}
|
||||
else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
|
||||
{
|
||||
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
//Component Select 1
|
||||
}
|
||||
|
||||
if (!count)
|
||||
{
|
||||
// Translate numComponents into bitmask
|
||||
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
|
||||
uint32_t compMask = (1 << iNumComponents) - 1;
|
||||
|
||||
compMask &= _ui32CompMask;
|
||||
// Calculate bits left in compMask
|
||||
return HLSLcc::GetNumberBitsSet(compMask);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
|
||||
int Operand::GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const
|
||||
{
|
||||
if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER)
|
||||
return 0;
|
||||
|
||||
if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE)
|
||||
return 0;
|
||||
|
||||
if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT)
|
||||
return 0;
|
||||
|
||||
if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const
|
||||
{
|
||||
return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase);
|
||||
}
|
||||
|
||||
SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const
|
||||
{
|
||||
// indexable temps (temp arrays) are always float
|
||||
if (eType == OPERAND_TYPE_INDEXABLE_TEMP)
|
||||
return SVT_FLOAT;
|
||||
|
||||
// The min precision qualifier overrides all of the stuff below
|
||||
switch (eMinPrecision)
|
||||
{
|
||||
case OPERAND_MIN_PRECISION_FLOAT_16:
|
||||
return SVT_FLOAT16;
|
||||
case OPERAND_MIN_PRECISION_FLOAT_2_8:
|
||||
return SVT_FLOAT10;
|
||||
case OPERAND_MIN_PRECISION_SINT_16:
|
||||
return SVT_INT16;
|
||||
case OPERAND_MIN_PRECISION_UINT_16:
|
||||
return SVT_UINT16;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case OPERAND_TYPE_TEMP:
|
||||
{
|
||||
SHADER_VARIABLE_TYPE eCurrentType = SVT_FLOAT;
|
||||
int i = 0;
|
||||
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
|
||||
{
|
||||
return aeDataType[aui32Swizzle[0]];
|
||||
}
|
||||
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
|
||||
{
|
||||
if (ui32Swizzle == (NO_SWIZZLE))
|
||||
{
|
||||
return aeDataType[0];
|
||||
}
|
||||
|
||||
return aeDataType[aui32Swizzle[0]];
|
||||
}
|
||||
|
||||
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
|
||||
{
|
||||
uint32_t mask = ui32CompMask;
|
||||
if (!mask)
|
||||
{
|
||||
mask = OPERAND_4_COMPONENT_MASK_ALL;
|
||||
}
|
||||
for (; i < 4; ++i)
|
||||
{
|
||||
if (mask & (1 << i))
|
||||
{
|
||||
eCurrentType = aeDataType[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
//Check if all elements have the same basic type.
|
||||
for (; i < 4; ++i)
|
||||
{
|
||||
if (mask & (1 << i))
|
||||
{
|
||||
if (eCurrentType != aeDataType[i])
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return eCurrentType;
|
||||
}
|
||||
|
||||
ASSERT(0);
|
||||
|
||||
break;
|
||||
}
|
||||
case OPERAND_TYPE_OUTPUT:
|
||||
{
|
||||
const uint32_t ui32Register = ui32RegisterNumber;
|
||||
int regSpace = GetRegisterSpace(psContext);
|
||||
const ShaderInfo::InOutSignature* psOut = NULL;
|
||||
|
||||
if (regSpace == 0)
|
||||
psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream,
|
||||
&psOut);
|
||||
else
|
||||
{
|
||||
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut, true);
|
||||
if (!psOut)
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
|
||||
ASSERT(psOut != NULL);
|
||||
if (psOut->eMinPrec != MIN_PRECISION_DEFAULT)
|
||||
{
|
||||
switch (psOut->eMinPrec)
|
||||
{
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
case MIN_PRECISION_FLOAT_16:
|
||||
return SVT_FLOAT16;
|
||||
case MIN_PRECISION_FLOAT_2_8:
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL)
|
||||
return SVT_FLOAT16;
|
||||
else
|
||||
return SVT_FLOAT10;
|
||||
case MIN_PRECISION_SINT_16:
|
||||
return SVT_INT16;
|
||||
case MIN_PRECISION_UINT_16:
|
||||
return SVT_UINT16;
|
||||
}
|
||||
}
|
||||
if (psOut->eComponentType == INOUT_COMPONENT_UINT32)
|
||||
{
|
||||
return SVT_UINT;
|
||||
}
|
||||
else if (psOut->eComponentType == INOUT_COMPONENT_SINT32)
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
return SVT_FLOAT;
|
||||
break;
|
||||
}
|
||||
case OPERAND_TYPE_INPUT:
|
||||
case OPERAND_TYPE_INPUT_PATCH_CONSTANT:
|
||||
case OPERAND_TYPE_INPUT_CONTROL_POINT:
|
||||
{
|
||||
const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1];
|
||||
int regSpace = GetRegisterSpace(psContext);
|
||||
const ShaderInfo::InOutSignature* psIn = NULL;
|
||||
|
||||
if (regSpace == 0)
|
||||
{
|
||||
if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0)
|
||||
return SVT_FLOAT; // All combined inputs are stored as floats
|
||||
psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(),
|
||||
&psIn);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0)
|
||||
return SVT_FLOAT; // All combined inputs are stored as floats
|
||||
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn);
|
||||
}
|
||||
|
||||
ASSERT(psIn != NULL);
|
||||
|
||||
switch (eSpecialName)
|
||||
{
|
||||
//UINT in DX, INT in GL.
|
||||
case NAME_PRIMITIVE_ID:
|
||||
case NAME_VERTEX_ID:
|
||||
case NAME_INSTANCE_ID:
|
||||
case NAME_RENDER_TARGET_ARRAY_INDEX:
|
||||
case NAME_VIEWPORT_ARRAY_INDEX:
|
||||
case NAME_SAMPLE_INDEX:
|
||||
return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT;
|
||||
|
||||
case NAME_IS_FRONT_FACE:
|
||||
return SVT_UINT;
|
||||
|
||||
case NAME_POSITION:
|
||||
case NAME_CLIP_DISTANCE:
|
||||
case NAME_CULL_DISTANCE:
|
||||
return SVT_FLOAT;
|
||||
|
||||
default:
|
||||
break;
|
||||
// fall through
|
||||
}
|
||||
|
||||
if (psIn->eSystemValueType == NAME_IS_FRONT_FACE)
|
||||
return SVT_UINT;
|
||||
|
||||
//UINT in DX, INT in GL.
|
||||
if (psIn->eSystemValueType == NAME_PRIMITIVE_ID ||
|
||||
psIn->eSystemValueType == NAME_VERTEX_ID ||
|
||||
psIn->eSystemValueType == NAME_INSTANCE_ID ||
|
||||
psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX ||
|
||||
psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX ||
|
||||
psIn->eSystemValueType == NAME_SAMPLE_INDEX)
|
||||
return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT;
|
||||
|
||||
if (psIn->eMinPrec != MIN_PRECISION_DEFAULT)
|
||||
{
|
||||
switch (psIn->eMinPrec)
|
||||
{
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
case MIN_PRECISION_FLOAT_16:
|
||||
return SVT_FLOAT16;
|
||||
case MIN_PRECISION_FLOAT_2_8:
|
||||
if (psContext->psShader->eTargetLanguage == LANG_METAL)
|
||||
return SVT_FLOAT16;
|
||||
else
|
||||
return SVT_FLOAT10;
|
||||
case MIN_PRECISION_SINT_16:
|
||||
return SVT_INT16;
|
||||
case MIN_PRECISION_UINT_16:
|
||||
return SVT_UINT16;
|
||||
}
|
||||
}
|
||||
|
||||
if (psIn->eComponentType == INOUT_COMPONENT_UINT32)
|
||||
{
|
||||
return SVT_UINT;
|
||||
}
|
||||
else if (psIn->eComponentType == INOUT_COMPONENT_SINT32)
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
return SVT_FLOAT;
|
||||
break;
|
||||
}
|
||||
case OPERAND_TYPE_CONSTANT_BUFFER:
|
||||
{
|
||||
const ConstantBuffer* psCBuf = NULL;
|
||||
const ShaderVarType* psVarType = NULL;
|
||||
int32_t rebase = -1;
|
||||
bool isArray;
|
||||
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf);
|
||||
if (psCBuf)
|
||||
{
|
||||
int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
|
||||
if (foundVar)
|
||||
return psVarType->Type;
|
||||
|
||||
ASSERT(0);
|
||||
}
|
||||
else
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
case OPERAND_TYPE_IMMEDIATE32:
|
||||
{
|
||||
return ePreferredTypeForImmediates;
|
||||
}
|
||||
|
||||
case OPERAND_TYPE_IMMEDIATE64:
|
||||
{
|
||||
return SVT_DOUBLE;
|
||||
}
|
||||
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID:
|
||||
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
|
||||
{
|
||||
return SVT_UINT;
|
||||
}
|
||||
case OPERAND_TYPE_SPECIAL_ADDRESS:
|
||||
case OPERAND_TYPE_SPECIAL_LOOPCOUNTER:
|
||||
case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
|
||||
case OPERAND_TYPE_INPUT_PRIMITIVEID:
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
case OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
|
||||
{
|
||||
return SVT_UINT;
|
||||
}
|
||||
case OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
|
||||
{
|
||||
return SVT_INT;
|
||||
}
|
||||
case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // constant array is floats everywhere except on vulkan
|
||||
{
|
||||
return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
}
|
||||
|
||||
return SVT_FLOAT;
|
||||
}
|
||||
|
||||
OPERAND_MIN_PRECISION Operand::ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec)
|
||||
{
|
||||
switch (ePrec)
|
||||
{
|
||||
default:
|
||||
case REFLECT_RESOURCE_PRECISION_UNKNOWN:
|
||||
case REFLECT_RESOURCE_PRECISION_LOWP:
|
||||
return OPERAND_MIN_PRECISION_FLOAT_2_8;
|
||||
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
|
||||
return OPERAND_MIN_PRECISION_FLOAT_16;
|
||||
case REFLECT_RESOURCE_PRECISION_HIGHP:
|
||||
return OPERAND_MIN_PRECISION_DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) const
|
||||
{
|
||||
const ShaderInfo::InOutSignature *psSig = NULL;
|
||||
int regSpace = GetRegisterSpace(psContext);
|
||||
|
||||
switch (eType)
|
||||
{
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
|
||||
case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
|
||||
case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
|
||||
return 1;
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
|
||||
case OPERAND_TYPE_INPUT_THREAD_ID:
|
||||
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
|
||||
case OPERAND_TYPE_INPUT_DOMAIN_POINT:
|
||||
return 3;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (regSpace == 0)
|
||||
psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
|
||||
else
|
||||
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
|
||||
|
||||
ASSERT(psSig != NULL);
|
||||
|
||||
return HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
|
||||
}
|
||||
|
||||
Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const
|
||||
{
|
||||
Operand *psDynIndexOp = m_SubOperands[0].get();
|
||||
if (psDynIndexOp == NULL)
|
||||
psDynIndexOp = m_SubOperands[1].get();
|
||||
|
||||
*needsIndexCalcRevert = false;
|
||||
if (psDynIndexOp != NULL && isAoS)
|
||||
{
|
||||
// if dynamically indexing array of structs, try using the original index var before the float4 address calc
|
||||
bool indexVarFound = false;
|
||||
*needsIndexCalcRevert = true;
|
||||
Instruction *psDynIndexOrigin = psDynIndexOp->m_Defines[0].m_Inst;
|
||||
Operand *asOps = psDynIndexOrigin->asOperands;
|
||||
Operand *psOriginOp = NULL;
|
||||
|
||||
// DXBC always addresses as float4, find the address calculation
|
||||
|
||||
// Special case where struct is float4 size, no extra calc is done
|
||||
if (ShaderInfo::GetCBVarSize(psVar->Parent, true) <= 16) // matrixAsVectors arg does not matter here as with matrices the size will go over the limit anyway
|
||||
{
|
||||
indexVarFound = true;
|
||||
*needsIndexCalcRevert = false;
|
||||
}
|
||||
else if (psDynIndexOrigin->eOpcode == OPCODE_IMUL)
|
||||
{
|
||||
// check which one of the src operands is the original index
|
||||
if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT || asOps[2].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
psOriginOp = &asOps[2];
|
||||
else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT || asOps[3].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
psOriginOp = &asOps[3];
|
||||
}
|
||||
else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL)
|
||||
{
|
||||
if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32 && asOps[1].eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
|
||||
psOriginOp = &asOps[0];
|
||||
else if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
|
||||
psOriginOp = &asOps[1];
|
||||
}
|
||||
|
||||
if (psOriginOp != NULL)
|
||||
{
|
||||
indexVarFound = true;
|
||||
|
||||
// Check if the mul dest is not the same temp as the src. Also check that the temp
|
||||
// does not have multiple uses (which could override the value)
|
||||
// -> we can use src straight and no index revert calc is needed
|
||||
if ((psOriginOp->eType == OPERAND_TYPE_INPUT)
|
||||
|| ((psOriginOp->ui32RegisterNumber != psDynIndexOp->ui32RegisterNumber || psOriginOp->GetDataType(psContext) != psDynIndexOp->GetDataType(psContext))
|
||||
&& (!psOriginOp->m_Defines.empty()) && psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1))
|
||||
{
|
||||
psDynIndexOp = psOriginOp;
|
||||
*needsIndexCalcRevert = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Atm we support only this very basic case of dynamic indexing array of structs.
|
||||
// Return error if something else is encountered.
|
||||
if (!indexVarFound)
|
||||
psContext->m_Reflection.OnDiagnostics("Unsupported dynamic indexing scheme on constant buffer vars.", 0, true);
|
||||
}
|
||||
|
||||
return psDynIndexOp;
|
||||
}
|
989
third_party/HLSLcc/src/Shader.cpp
vendored
Normal file
989
third_party/HLSLcc/src/Shader.cpp
vendored
Normal file
@ -0,0 +1,989 @@
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include <algorithm>
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/Declaration.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
|
||||
uint32_t Shader::GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const
|
||||
{
|
||||
switch (eType)
|
||||
{
|
||||
case SVT_FLOAT:
|
||||
return psFloatTempSizes[ui32Reg];
|
||||
case SVT_FLOAT16:
|
||||
return psFloat16TempSizes[ui32Reg];
|
||||
case SVT_FLOAT10:
|
||||
return psFloat10TempSizes[ui32Reg];
|
||||
case SVT_INT:
|
||||
return psIntTempSizes[ui32Reg];
|
||||
case SVT_INT16:
|
||||
return psInt16TempSizes[ui32Reg];
|
||||
case SVT_INT12:
|
||||
return psInt12TempSizes[ui32Reg];
|
||||
case SVT_UINT:
|
||||
return psUIntTempSizes[ui32Reg];
|
||||
case SVT_UINT16:
|
||||
return psUInt16TempSizes[ui32Reg];
|
||||
case SVT_DOUBLE:
|
||||
return psDoubleTempSizes[ui32Reg];
|
||||
case SVT_BOOL:
|
||||
return psBoolTempSizes[ui32Reg];
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Shader::ConsolidateHullTempVars()
|
||||
{
|
||||
uint32_t i, phase;
|
||||
uint32_t numTemps = 0;
|
||||
for (phase = 0; phase < asPhases.size(); phase++)
|
||||
{
|
||||
for (i = 0; i < asPhases[phase].psDecl.size(); i++)
|
||||
{
|
||||
if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS)
|
||||
{
|
||||
if (asPhases[phase].psDecl[i].value.ui32NumTemps > numTemps)
|
||||
numTemps = asPhases[phase].psDecl[i].value.ui32NumTemps;
|
||||
asPhases[phase].psDecl[i].value.ui32NumTemps = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now we have the max temps, write it back to the first one we see.
|
||||
for (phase = 0; phase < asPhases.size(); phase++)
|
||||
{
|
||||
for (i = 0; i < asPhases[phase].psDecl.size(); i++)
|
||||
{
|
||||
if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS)
|
||||
{
|
||||
asPhases[phase].psDecl[i].value.ui32NumTemps = numTemps;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Image (RWTexture in HLSL) declaration op does not provide enough info about the format and accessing.
|
||||
// Go through all image declarations and instructions accessing it to see if it is readonly/writeonly.
|
||||
// While doing that we also get the number of components expected in the image format.
|
||||
// Also resolve access flags for other UAVs as well. No component count resolving for them.
|
||||
void ShaderPhase::ResolveUAVProperties(const ShaderInfo& sInfo)
|
||||
{
|
||||
Declaration *psFirstDeclaration = &psDecl[0];
|
||||
|
||||
uint32_t ui32NumDeclarations = (uint32_t)psDecl.size();
|
||||
Instruction *psFirstInstruction = &psInst[0];
|
||||
uint32_t ui32NumInstructions = (uint32_t)psInst.size();
|
||||
|
||||
if (ui32NumDeclarations == 0 || ui32NumInstructions == 0)
|
||||
return;
|
||||
|
||||
Declaration *psLastDeclaration = psFirstDeclaration + ui32NumDeclarations - 1;
|
||||
Instruction *psLastInstruction = psFirstInstruction + ui32NumInstructions - 1;
|
||||
Declaration *psDecl;
|
||||
|
||||
for (psDecl = psFirstDeclaration; psDecl <= psLastDeclaration; psDecl++)
|
||||
{
|
||||
Instruction *psInst;
|
||||
uint32_t uavReg;
|
||||
if (psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED &&
|
||||
psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED &&
|
||||
psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW)
|
||||
continue;
|
||||
|
||||
uavReg = psDecl->asOperands[0].ui32RegisterNumber;
|
||||
|
||||
for (psInst = psFirstInstruction; psInst <= psLastInstruction; psInst++)
|
||||
{
|
||||
uint32_t opIndex;
|
||||
uint32_t accessFlags;
|
||||
uint32_t numComponents;
|
||||
|
||||
switch (psInst->eOpcode)
|
||||
{
|
||||
case OPCODE_LD_UAV_TYPED:
|
||||
opIndex = 2;
|
||||
accessFlags = ACCESS_FLAG_READ;
|
||||
numComponents = psInst->asOperands[0].GetNumSwizzleElements(); // get component count from the write target
|
||||
break;
|
||||
|
||||
case OPCODE_STORE_UAV_TYPED:
|
||||
ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW);
|
||||
opIndex = 0;
|
||||
accessFlags = ACCESS_FLAG_WRITE;
|
||||
numComponents = 0; // store op does not contribute on the component count resolving
|
||||
break;
|
||||
|
||||
case OPCODE_ATOMIC_CMP_STORE:
|
||||
case OPCODE_ATOMIC_AND:
|
||||
case OPCODE_ATOMIC_IADD:
|
||||
case OPCODE_ATOMIC_OR:
|
||||
case OPCODE_ATOMIC_XOR:
|
||||
case OPCODE_ATOMIC_IMIN:
|
||||
case OPCODE_ATOMIC_UMIN:
|
||||
case OPCODE_ATOMIC_IMAX:
|
||||
case OPCODE_ATOMIC_UMAX:
|
||||
opIndex = 0;
|
||||
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC;
|
||||
numComponents = 1;
|
||||
break;
|
||||
|
||||
case OPCODE_IMM_ATOMIC_AND:
|
||||
case OPCODE_IMM_ATOMIC_IADD:
|
||||
case OPCODE_IMM_ATOMIC_IMAX:
|
||||
case OPCODE_IMM_ATOMIC_IMIN:
|
||||
case OPCODE_IMM_ATOMIC_UMAX:
|
||||
case OPCODE_IMM_ATOMIC_UMIN:
|
||||
case OPCODE_IMM_ATOMIC_OR:
|
||||
case OPCODE_IMM_ATOMIC_XOR:
|
||||
case OPCODE_IMM_ATOMIC_EXCH:
|
||||
case OPCODE_IMM_ATOMIC_CMP_EXCH:
|
||||
opIndex = 1;
|
||||
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC;
|
||||
numComponents = 1;
|
||||
break;
|
||||
|
||||
// The rest of the ops here are only for buffer UAVs. No need for component count resolving.
|
||||
case OPCODE_LD_STRUCTURED:
|
||||
opIndex = 3;
|
||||
accessFlags = ACCESS_FLAG_READ;
|
||||
numComponents = 0;
|
||||
break;
|
||||
|
||||
case OPCODE_STORE_STRUCTURED:
|
||||
opIndex = 0;
|
||||
accessFlags = ACCESS_FLAG_WRITE;
|
||||
numComponents = 0;
|
||||
break;
|
||||
|
||||
case OPCODE_LD_RAW:
|
||||
opIndex = 2;
|
||||
accessFlags = ACCESS_FLAG_READ;
|
||||
numComponents = 0;
|
||||
break;
|
||||
|
||||
case OPCODE_STORE_RAW:
|
||||
opIndex = 0;
|
||||
accessFlags = ACCESS_FLAG_WRITE;
|
||||
numComponents = 0;
|
||||
break;
|
||||
|
||||
case OPCODE_IMM_ATOMIC_ALLOC:
|
||||
case OPCODE_IMM_ATOMIC_CONSUME:
|
||||
opIndex = 1;
|
||||
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC;
|
||||
numComponents = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
// Buffer loads can also happen on non-uav. Skip those.
|
||||
if (psInst->asOperands[opIndex].eType != OPERAND_TYPE_UNORDERED_ACCESS_VIEW)
|
||||
continue;
|
||||
|
||||
// Check the instruction is operating on the declared uav
|
||||
if (psInst->asOperands[opIndex].ui32RegisterNumber != uavReg)
|
||||
continue;
|
||||
|
||||
psDecl->sUAV.ui32AccessFlags |= accessFlags;
|
||||
|
||||
// get the max components accessed, but only for typed (texture) UAVs
|
||||
if (numComponents > psDecl->sUAV.ui32NumComponents && psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED)
|
||||
{
|
||||
psDecl->sUAV.ui32NumComponents = numComponents;
|
||||
}
|
||||
}
|
||||
|
||||
if (psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED)
|
||||
{
|
||||
const ResourceBinding* psBinding = 0;
|
||||
if (sInfo.GetResourceFromBindingPoint(RGROUP_UAV, uavReg, &psBinding))
|
||||
{
|
||||
// component count is stored in flags as 2 bits, 00: vec1, 01: vec2, 10: vec3, 11: vec4
|
||||
psDecl->sUAV.ui32NumComponents = ((psBinding->ui32Flags >> 2) & 3) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void GatherOperandAccessMasks(const Operand *psOperand, char *destTable)
|
||||
{
|
||||
int i;
|
||||
uint32_t reg;
|
||||
for (i = 0; i < MAX_SUB_OPERANDS; i++)
|
||||
{
|
||||
if (psOperand->m_SubOperands[i].get())
|
||||
GatherOperandAccessMasks(psOperand->m_SubOperands[i].get(), destTable);
|
||||
}
|
||||
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
reg = psOperand->ui32RegisterNumber & 0xffff; // We add 0x10000 to all newly created ones earlier
|
||||
|
||||
destTable[reg] |= (char)psOperand->GetAccessMask();
|
||||
}
|
||||
|
||||
// Coalesce the split temps back based on their original temp register. Keep uint/int/float operations separate
|
||||
static void CoalesceTemps(Shader *psShader, ShaderPhase *psPhase, uint32_t ui32MaxOrigTemps)
|
||||
{
|
||||
// Just move all operations back to their original registers, but keep the data type assignments.
|
||||
uint32_t i, k;
|
||||
Instruction *psLastInstruction = &psPhase->psInst[psPhase->psInst.size() - 1];
|
||||
std::vector<char> opAccessMasks;
|
||||
|
||||
// First move all newly created temps to high enough so they won't overlap with the rebased ones
|
||||
|
||||
Instruction *inst = &psPhase->psInst[0];
|
||||
|
||||
if (psPhase->psInst.size() == 0 || psPhase->ui32OrigTemps == 0)
|
||||
return;
|
||||
|
||||
while (inst <= psLastInstruction)
|
||||
{
|
||||
// Update all operands and their suboperands
|
||||
for (i = psPhase->ui32OrigTemps; i < psPhase->ui32TotalTemps; i++)
|
||||
{
|
||||
for (k = 0; k < inst->ui32NumOperands; k++)
|
||||
inst->ChangeOperandTempRegister(&inst->asOperands[k], i, 0x10000 + i, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, 0);
|
||||
}
|
||||
inst++;
|
||||
}
|
||||
|
||||
// Prune the original registers, rebase if necessary
|
||||
opAccessMasks.clear();
|
||||
opAccessMasks.resize(psPhase->ui32TotalTemps, 0);
|
||||
inst = &psPhase->psInst[0];
|
||||
while (inst <= psLastInstruction)
|
||||
{
|
||||
for (k = 0; k < inst->ui32NumOperands; k++)
|
||||
GatherOperandAccessMasks(&inst->asOperands[k], &opAccessMasks[0]);
|
||||
inst++;
|
||||
}
|
||||
|
||||
for (i = 0; i < psPhase->ui32TotalTemps; i++)
|
||||
{
|
||||
uint32_t rebase, count;
|
||||
uint32_t newReg = i;
|
||||
uint32_t origReg = i;
|
||||
int needsMoving = 0;
|
||||
SHADER_VARIABLE_TYPE dataType;
|
||||
|
||||
// Figure out rebase and count
|
||||
rebase = 0;
|
||||
count = 0;
|
||||
if (i < psPhase->ui32OrigTemps)
|
||||
{
|
||||
// One of the original registers
|
||||
k = opAccessMasks[i];
|
||||
if (k == 0)
|
||||
continue;
|
||||
|
||||
while ((k & 1) == 0)
|
||||
{
|
||||
rebase++;
|
||||
k = k >> 1;
|
||||
}
|
||||
while (k != 0)
|
||||
{
|
||||
count++;
|
||||
k = k >> 1;
|
||||
}
|
||||
newReg = i + ui32MaxOrigTemps * rebase;
|
||||
if (rebase != 0)
|
||||
needsMoving = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Newly created split registers, read info from table
|
||||
// Read the count and rebase from split info table
|
||||
count = (psPhase->pui32SplitInfo[i] >> 24) & 0xff;
|
||||
rebase = (psPhase->pui32SplitInfo[i] >> 16) & 0xff;
|
||||
origReg = 0x10000 + i;
|
||||
newReg = (psPhase->pui32SplitInfo[i]) & 0xffff;
|
||||
while (psPhase->pui32SplitInfo[newReg] != 0xffffffff)
|
||||
newReg = (psPhase->pui32SplitInfo[newReg]) & 0xffff;
|
||||
|
||||
// If count is 4, verify that we have both first and last bit set
|
||||
ASSERT(count != 4 || (opAccessMasks[i] & 9) == 9);
|
||||
|
||||
newReg = newReg + ui32MaxOrigTemps * rebase;
|
||||
|
||||
// Don't rebase again
|
||||
rebase = 0;
|
||||
needsMoving = 1;
|
||||
}
|
||||
|
||||
if (needsMoving)
|
||||
{
|
||||
// printf("Moving reg %d to %d, count %d rebase %d\n", origReg, newReg, count, rebase);
|
||||
|
||||
// Move directly to correct location
|
||||
inst = &psPhase->psInst[0];
|
||||
while (inst <= psLastInstruction)
|
||||
{
|
||||
for (k = 0; k < inst->ui32NumOperands; k++)
|
||||
inst->ChangeOperandTempRegister(&inst->asOperands[k], origReg, newReg, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, rebase);
|
||||
inst++;
|
||||
}
|
||||
}
|
||||
// Mark the count
|
||||
dataType = psPhase->peTempTypes[i * 4 + rebase];
|
||||
switch (dataType)
|
||||
{
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
case SVT_BOOL:
|
||||
psShader->psBoolTempSizes[newReg] = std::max(psShader->psBoolTempSizes[newReg], (char)count);
|
||||
break;
|
||||
case SVT_FLOAT:
|
||||
psShader->psFloatTempSizes[newReg] = std::max(psShader->psFloatTempSizes[newReg], (char)count);
|
||||
break;
|
||||
case SVT_FLOAT16:
|
||||
psShader->psFloat16TempSizes[newReg] = std::max(psShader->psFloat16TempSizes[newReg], (char)count);
|
||||
break;
|
||||
case SVT_FLOAT10:
|
||||
psShader->psFloat10TempSizes[newReg] = std::max(psShader->psFloat10TempSizes[newReg], (char)count);
|
||||
break;
|
||||
case SVT_INT:
|
||||
psShader->psIntTempSizes[newReg] = std::max(psShader->psIntTempSizes[newReg], (char)count);
|
||||
break;
|
||||
case SVT_INT16:
|
||||
psShader->psInt16TempSizes[newReg] = std::max(psShader->psInt16TempSizes[newReg], (char)count);
|
||||
break;
|
||||
case SVT_INT12:
|
||||
psShader->psInt12TempSizes[newReg] = std::max(psShader->psInt12TempSizes[newReg], (char)count);
|
||||
break;
|
||||
case SVT_UINT:
|
||||
psShader->psUIntTempSizes[newReg] = std::max(psShader->psUIntTempSizes[newReg], (char)count);
|
||||
break;
|
||||
case SVT_UINT16:
|
||||
psShader->psUInt16TempSizes[newReg] = std::max(psShader->psUInt16TempSizes[newReg], (char)count);
|
||||
break;
|
||||
case SVT_DOUBLE:
|
||||
psShader->psDoubleTempSizes[newReg] = std::max(psShader->psDoubleTempSizes[newReg], (char)count);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mark whether the temp registers are used per each data type.
|
||||
void Shader::PruneTempRegisters()
|
||||
{
|
||||
uint32_t k;
|
||||
uint32_t maxOrigTemps = 0;
|
||||
uint32_t maxTotalTemps = 0;
|
||||
// First find the total amount of temps
|
||||
for (k = 0; k < asPhases.size(); k++)
|
||||
{
|
||||
ShaderPhase *psPhase = &asPhases[k];
|
||||
maxOrigTemps = std::max(maxOrigTemps, psPhase->ui32OrigTemps);
|
||||
maxTotalTemps = std::max(maxTotalTemps, psPhase->ui32TotalTemps);
|
||||
}
|
||||
|
||||
if (maxTotalTemps == 0)
|
||||
return; // splitarrays are nulls, no need to free
|
||||
|
||||
// Allocate and zero-initialize arrays for each temp sizes. *4 is for every possible rebase
|
||||
psIntTempSizes.clear();
|
||||
psIntTempSizes.resize(maxOrigTemps * 4, 0);
|
||||
psInt12TempSizes.clear();
|
||||
psInt12TempSizes.resize(maxOrigTemps * 4, 0);
|
||||
psInt16TempSizes.clear();
|
||||
psInt16TempSizes.resize(maxOrigTemps * 4, 0);
|
||||
psUIntTempSizes.clear();
|
||||
psUIntTempSizes.resize(maxOrigTemps * 4, 0);
|
||||
psUInt16TempSizes.clear();
|
||||
psUInt16TempSizes.resize(maxOrigTemps * 4, 0);
|
||||
psFloatTempSizes.clear();
|
||||
psFloatTempSizes.resize(maxOrigTemps * 4, 0);
|
||||
psFloat16TempSizes.clear();
|
||||
psFloat16TempSizes.resize(maxOrigTemps * 4, 0);
|
||||
psFloat10TempSizes.clear();
|
||||
psFloat10TempSizes.resize(maxOrigTemps * 4, 0);
|
||||
psDoubleTempSizes.clear();
|
||||
psDoubleTempSizes.resize(maxOrigTemps * 4, 0);
|
||||
psBoolTempSizes.clear();
|
||||
psBoolTempSizes.resize(maxOrigTemps * 4, 0);
|
||||
|
||||
for (k = 0; k < asPhases.size(); k++)
|
||||
{
|
||||
ShaderPhase *psPhase = &asPhases[k];
|
||||
CoalesceTemps(this, psPhase, maxOrigTemps);
|
||||
if (psPhase->psTempDeclaration)
|
||||
psPhase->psTempDeclaration->value.ui32NumTemps = maxOrigTemps * 4;
|
||||
}
|
||||
}
|
||||
|
||||
static void DoSignatureAnalysis(std::vector<ShaderInfo::InOutSignature> &psSignatures, std::vector<unsigned char> &outTable)
|
||||
{
|
||||
// Fill the char, 2 bits per component so that each 2 bits encode the following info:
|
||||
// 0: unused OR used by the first signature we happened to see
|
||||
// 1: used by the second signature
|
||||
// 2: used by the third sig
|
||||
// 3: used by the fourth sig.
|
||||
|
||||
// The counters for each input/output/patch. Start with 8 registers, grow as needed
|
||||
std::vector<unsigned char> counters(8, (unsigned char)0);
|
||||
outTable.clear();
|
||||
outTable.resize(8, (unsigned char)0);
|
||||
|
||||
size_t i;
|
||||
for (i = 0; i < psSignatures.size(); i++)
|
||||
{
|
||||
ShaderInfo::InOutSignature *psSig = &psSignatures[i];
|
||||
char currCounter;
|
||||
char mask;
|
||||
ASSERT(psSig != NULL);
|
||||
|
||||
// We'll skip SV_Depth and others that put -1 to the register.
|
||||
if (psSig->ui32Register == 0xffffffffu)
|
||||
continue;
|
||||
|
||||
// Make sure there's enough room in the table
|
||||
if (psSig->ui32Register >= counters.size())
|
||||
{
|
||||
counters.resize(psSig->ui32Register * 2, 0);
|
||||
outTable.resize(psSig->ui32Register * 2, 0);
|
||||
}
|
||||
|
||||
// Apply counter value to masked items
|
||||
currCounter = counters[psSig->ui32Register];
|
||||
// Duplicate counter bits
|
||||
currCounter = currCounter | (currCounter << 2) | (currCounter << 4) | (currCounter << 6);
|
||||
// Widen the mask
|
||||
mask = (unsigned char)psSig->ui32Mask;
|
||||
mask = ((mask & 8) << 3) | ((mask & 4) << 2) | ((mask & 2) << 1) | (mask & 1);
|
||||
mask = mask | (mask << 1);
|
||||
// Write output
|
||||
outTable[psSig->ui32Register] |= (currCounter & mask);
|
||||
// Update counter
|
||||
counters[psSig->ui32Register]++;
|
||||
}
|
||||
}
|
||||
|
||||
void Shader::DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t regSpace = psOperand->GetRegisterSpace(eShaderType, psPhase->ePhase);
|
||||
unsigned char *redirectTable = NULL;
|
||||
unsigned char redir = 0;
|
||||
unsigned char firstFound = 0;
|
||||
uint32_t mask;
|
||||
|
||||
for (i = 0; i < MAX_SUB_OPERANDS; i++)
|
||||
if (psOperand->m_SubOperands[i].get())
|
||||
DoIOOverlapOperand(psPhase, psOperand->m_SubOperands[i].get());
|
||||
|
||||
|
||||
switch (psOperand->eType)
|
||||
{
|
||||
case OPERAND_TYPE_INPUT:
|
||||
case OPERAND_TYPE_INPUT_CONTROL_POINT:
|
||||
case OPERAND_TYPE_INPUT_PATCH_CONSTANT:
|
||||
redirectTable = regSpace == 0 ? &psPhase->acInputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0];
|
||||
break;
|
||||
|
||||
case OPERAND_TYPE_OUTPUT:
|
||||
case OPERAND_TYPE_OUTPUT_CONTROL_POINT:
|
||||
redirectTable = regSpace == 0 ? &psPhase->acOutputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0];
|
||||
break;
|
||||
|
||||
default:
|
||||
// Not a input or output, nothing to do here
|
||||
return;
|
||||
}
|
||||
|
||||
redir = redirectTable[psOperand->ui32RegisterNumber];
|
||||
|
||||
if (redir == 0xff) // Already found overlap?
|
||||
return;
|
||||
|
||||
mask = psOperand->GetAccessMask();
|
||||
i = 0;
|
||||
// Find the first mask bit set.
|
||||
while ((mask & (1 << i)) == 0)
|
||||
i++;
|
||||
|
||||
firstFound = (redir >> (i * 2)) & 3;
|
||||
for (; i < 4; i++)
|
||||
{
|
||||
unsigned char sig;
|
||||
if ((mask & (1 << i)) == 0)
|
||||
continue;
|
||||
|
||||
sig = (redir >> (i * 2)) & 3;
|
||||
// All set bits must access the same signature
|
||||
if (sig != firstFound)
|
||||
{
|
||||
redirectTable[psOperand->ui32RegisterNumber] = 0xff;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void PruneRedirectEntry(unsigned char &itr)
|
||||
{
|
||||
if (itr != 0xff)
|
||||
itr = 0;
|
||||
}
|
||||
|
||||
// Check if inputs and outputs are accessed across semantic boundaries
|
||||
// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together.
|
||||
void Shader::AnalyzeIOOverlap()
|
||||
{
|
||||
uint32_t i, k;
|
||||
std::vector<unsigned char> outData;
|
||||
DoSignatureAnalysis(sInfo.psInputSignatures, outData);
|
||||
|
||||
// Now data has the values, copy them to all phases
|
||||
for (i = 0; i < asPhases.size(); i++)
|
||||
asPhases[i].acInputNeedsRedirect = outData;
|
||||
|
||||
DoSignatureAnalysis(sInfo.psOutputSignatures, outData);
|
||||
for (i = 0; i < asPhases.size(); i++)
|
||||
asPhases[i].acOutputNeedsRedirect = outData;
|
||||
|
||||
DoSignatureAnalysis(sInfo.psPatchConstantSignatures, outData);
|
||||
for (i = 0; i < asPhases.size(); i++)
|
||||
asPhases[i].acPatchConstantsNeedsRedirect = outData;
|
||||
|
||||
// Now walk through all operands and suboperands in all instructions and write 0xff to the dest (cannot occur otherwise)
|
||||
// if we're crossing signature borders
|
||||
for (i = 0; i < asPhases.size(); i++)
|
||||
{
|
||||
ShaderPhase *psPhase = &asPhases[i];
|
||||
for (k = 0; k < psPhase->psInst.size(); k++)
|
||||
{
|
||||
Instruction *psInst = &psPhase->psInst[k];
|
||||
uint32_t j;
|
||||
for (j = 0; j < psInst->ui32NumOperands; j++)
|
||||
DoIOOverlapOperand(psPhase, &psInst->asOperands[j]);
|
||||
}
|
||||
|
||||
// Now prune all tables from anything except 0xff.
|
||||
std::for_each(psPhase->acInputNeedsRedirect.begin(), psPhase->acInputNeedsRedirect.end(), PruneRedirectEntry);
|
||||
std::for_each(psPhase->acOutputNeedsRedirect.begin(), psPhase->acOutputNeedsRedirect.end(), PruneRedirectEntry);
|
||||
std::for_each(psPhase->acPatchConstantsNeedsRedirect.begin(), psPhase->acPatchConstantsNeedsRedirect.end(), PruneRedirectEntry);
|
||||
}
|
||||
}
|
||||
|
||||
void Shader::SetMaxSemanticIndex()
|
||||
{
|
||||
for (std::vector<ShaderInfo::InOutSignature>::iterator it = sInfo.psInputSignatures.begin(); it != sInfo.psInputSignatures.end(); ++it)
|
||||
maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex);
|
||||
|
||||
for (std::vector<ShaderInfo::InOutSignature>::iterator it = sInfo.psOutputSignatures.begin(); it != sInfo.psOutputSignatures.end(); ++it)
|
||||
maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex);
|
||||
|
||||
for (std::vector<ShaderInfo::InOutSignature>::iterator it = sInfo.psPatchConstantSignatures.begin(); it != sInfo.psPatchConstantSignatures.end(); ++it)
|
||||
maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex);
|
||||
}
|
||||
|
||||
// In DX bytecode, all const arrays are vec4's, and all arrays are stuffed to one large array.
|
||||
// Luckily, each chunk is always accessed with suboperand plus <constant> (in ui32RegisterNumber)
|
||||
// So do an analysis pass. Also trim the vec4's into smaller formats if the extra components are never read.
|
||||
void ShaderPhase::PruneConstArrays()
|
||||
{
|
||||
using namespace std;
|
||||
auto customDataItr = find_if(psDecl.begin(), psDecl.end(), [](const Declaration &d) { return d.eOpcode == OPCODE_CUSTOMDATA; });
|
||||
// Not found? We're done.
|
||||
if (customDataItr == psDecl.end())
|
||||
return;
|
||||
|
||||
// Store the original declaration
|
||||
m_ConstantArrayInfo.m_OrigDeclaration = &(*customDataItr);
|
||||
|
||||
// Loop through each operand and pick up usage masks
|
||||
HLSLcc::ForEachOperand(psInst.begin(), psInst.end(), FEO_FLAG_ALL, [this](const std::vector<Instruction>::iterator &psInst, const Operand *psOperand, uint32_t ui32OperandType)
|
||||
{
|
||||
using namespace std;
|
||||
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER)
|
||||
{
|
||||
uint32_t accessMask = psOperand->GetAccessMask();
|
||||
uint32_t offset = psOperand->ui32RegisterNumber;
|
||||
|
||||
// Update the chunk access mask
|
||||
|
||||
// Find all existing entries that have anything common with the access mask
|
||||
auto cbrange = m_ConstantArrayInfo.m_Chunks.equal_range(offset);
|
||||
vector<ChunkMap::iterator> matchingEntries;
|
||||
for (auto itr = cbrange.first; itr != cbrange.second; itr++)
|
||||
{
|
||||
if ((itr->second.m_AccessMask & accessMask) != 0)
|
||||
{
|
||||
matchingEntries.push_back(itr);
|
||||
}
|
||||
}
|
||||
|
||||
if (matchingEntries.empty())
|
||||
{
|
||||
// Not found, create new entry
|
||||
m_ConstantArrayInfo.m_Chunks.insert(make_pair(offset, ConstantArrayChunk(0u, accessMask, (Operand *)psOperand)));
|
||||
}
|
||||
else if (matchingEntries.size() == 1)
|
||||
{
|
||||
// Update access mask of the one existing entry
|
||||
matchingEntries[0]->second.m_AccessMask |= accessMask;
|
||||
matchingEntries[0]->second.m_UseSites.push_back((Operand *)psOperand);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Multiple entries with (now) overlapping mask. Merge to the first one.
|
||||
ChunkMap::iterator tgt = matchingEntries[0];
|
||||
tgt->second.m_AccessMask |= accessMask;
|
||||
tgt->second.m_UseSites.push_back((Operand *)psOperand);
|
||||
ChunkMap &chunks = m_ConstantArrayInfo.m_Chunks;
|
||||
for_each(matchingEntries.begin() + 1, matchingEntries.end(), [&tgt, &chunks](ChunkMap::iterator itr)
|
||||
{
|
||||
tgt->second.m_AccessMask |= itr->second.m_AccessMask;
|
||||
chunks.erase(itr);
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Figure out how large each chunk is by finding the next chunk that uses any bits from the current mask (or the max size if not found)
|
||||
|
||||
uint32_t totalSize = (uint32_t)m_ConstantArrayInfo.m_OrigDeclaration->asImmediateConstBuffer.size();
|
||||
for (auto chunk = m_ConstantArrayInfo.m_Chunks.begin(); chunk != m_ConstantArrayInfo.m_Chunks.end(); chunk++)
|
||||
{
|
||||
// Find the next chunk that shares any bits in the access mask
|
||||
auto nextItr = find_if(m_ConstantArrayInfo.m_Chunks.lower_bound(chunk->first + 1), m_ConstantArrayInfo.m_Chunks.end(), [&chunk](ChunkMap::value_type &itr)
|
||||
{
|
||||
return (chunk->second.m_AccessMask & itr.second.m_AccessMask) != 0;
|
||||
});
|
||||
|
||||
// Not found? Must continue until the end of array
|
||||
if (nextItr == m_ConstantArrayInfo.m_Chunks.end())
|
||||
chunk->second.m_Size = totalSize - chunk->first;
|
||||
else
|
||||
{
|
||||
// Otherwise we know the chunk size directly.
|
||||
chunk->second.m_Size = nextItr->first - chunk->first;
|
||||
}
|
||||
|
||||
// Do rebase on the operands if necessary
|
||||
chunk->second.m_Rebase = 0;
|
||||
uint32_t t = chunk->second.m_AccessMask;
|
||||
ASSERT(t != 0);
|
||||
while ((t & 1) == 0)
|
||||
{
|
||||
chunk->second.m_Rebase++;
|
||||
t >>= 1;
|
||||
}
|
||||
uint32_t rebase = chunk->second.m_Rebase;
|
||||
uint32_t componentCount = 0;
|
||||
while (t != 0)
|
||||
{
|
||||
componentCount++;
|
||||
t >>= 1;
|
||||
}
|
||||
chunk->second.m_ComponentCount = componentCount;
|
||||
|
||||
for_each(chunk->second.m_UseSites.begin(), chunk->second.m_UseSites.end(), [&rebase, &componentCount](Operand *op)
|
||||
{
|
||||
// Store the rebase value to each operand and do the actual rebase.
|
||||
op->m_Rebase = rebase;
|
||||
op->m_Size = componentCount;
|
||||
|
||||
if (rebase != 0)
|
||||
{
|
||||
// Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask.
|
||||
switch (op->eSelMode)
|
||||
{
|
||||
case OPERAND_4_COMPONENT_MASK_MODE:
|
||||
{
|
||||
uint32_t oldMask = op->ui32CompMask;
|
||||
if (oldMask == 0)
|
||||
oldMask = OPERAND_4_COMPONENT_MASK_ALL;
|
||||
|
||||
// Check that we're not losing any information
|
||||
ASSERT((oldMask >> rebase) << rebase == oldMask);
|
||||
op->ui32CompMask = (oldMask >> rebase);
|
||||
break;
|
||||
}
|
||||
case OPERAND_4_COMPONENT_SELECT_1_MODE:
|
||||
ASSERT(op->aui32Swizzle[0] >= rebase);
|
||||
op->aui32Swizzle[0] -= rebase;
|
||||
break;
|
||||
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
// Note that this rebase is different from the one done for source operands
|
||||
ASSERT(op->aui32Swizzle[i] >= rebase);
|
||||
op->aui32Swizzle[i] -= rebase;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// We'll do the actual declaration and pruning later on, now that we have the info stored up.
|
||||
}
|
||||
|
||||
HLSLcc::ControlFlow::ControlFlowGraph &ShaderPhase::GetCFG()
|
||||
{
|
||||
if (!m_CFGInitialized)
|
||||
{
|
||||
m_CFG.Build(psInst.data(), psInst.data() + psInst.size());
|
||||
m_CFGInitialized = true;
|
||||
}
|
||||
|
||||
return m_CFG;
|
||||
}
|
||||
|
||||
void ShaderPhase::UnvectorizeImmMoves()
|
||||
{
|
||||
// NOTE must be called before datatype analysis and other analysis phases are done, as the pointers won't match anymore
|
||||
// (we insert new instructions there)
|
||||
using namespace std;
|
||||
vector<Instruction> nInst;
|
||||
// Reserve 1.5x space
|
||||
nInst.reserve(psInst.size() * 3 / 2);
|
||||
|
||||
for_each(psInst.begin(), psInst.end(), [&](Instruction &i)
|
||||
{
|
||||
if (i.eOpcode != OPCODE_MOV || i.asOperands[0].eType != OPERAND_TYPE_TEMP || i.asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32 || i.asOperands[0].GetNumSwizzleElements() == 1)
|
||||
{
|
||||
nInst.push_back(i);
|
||||
return;
|
||||
}
|
||||
// Ok, found one to unvectorize.
|
||||
ASSERT(i.asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE);
|
||||
uint32_t mask = i.asOperands[0].ui32CompMask;
|
||||
for (uint32_t j = 0; j < 4; j++)
|
||||
{
|
||||
if ((mask & (1 << j)) == 0)
|
||||
continue;
|
||||
|
||||
Instruction ni = i;
|
||||
ni.asOperands[0].ui32CompMask = (1 << j);
|
||||
nInst.push_back(ni);
|
||||
}
|
||||
});
|
||||
psInst.clear();
|
||||
psInst.swap(nInst);
|
||||
}
|
||||
|
||||
void ShaderPhase::ExpandSWAPCs()
|
||||
{
|
||||
// First find the DCL_TEMPS declaration
|
||||
auto dcitr = std::find_if(psDecl.begin(), psDecl.end(), [](const Declaration &decl) -> bool { return decl.eOpcode == OPCODE_DCL_TEMPS; });
|
||||
if (dcitr == psDecl.end())
|
||||
{
|
||||
// No temp declaration? Probably we won't have SWAPC either, then.
|
||||
return;
|
||||
}
|
||||
Declaration &tmpDecl = *dcitr;
|
||||
|
||||
uint32_t extraTemp = 0;
|
||||
bool extraTempAllocated = false;
|
||||
|
||||
// Parse through instructions, open up SWAPCs if necessary
|
||||
while (1)
|
||||
{
|
||||
// Need to find from top every time, because we're inserting stuff into the vector
|
||||
auto swapItr = std::find_if(psInst.begin(), psInst.end(), [](const Instruction &inst) -> bool { return inst.eOpcode == OPCODE_SWAPC; });
|
||||
if (swapItr == psInst.end())
|
||||
break;
|
||||
|
||||
// Ok swapItr now points to a SWAPC instruction that we'll have to split up like this (from MSDN):
|
||||
|
||||
/* swapc dest0[.mask],
|
||||
dest1[.mask],
|
||||
src0[.swizzle],
|
||||
src1[.swizzle],
|
||||
src2[.swizzle]
|
||||
|
||||
expands to :
|
||||
|
||||
movc temp[dest0s mask],
|
||||
src0[.swizzle],
|
||||
src2[.swizzle], src1[.swizzle]
|
||||
|
||||
movc dest1[.mask],
|
||||
src0[.swizzle],
|
||||
src1[.swizzle], src2[.swizzle]
|
||||
|
||||
mov dest0.mask, temp
|
||||
*/
|
||||
// Allocate a new temp, if not already done
|
||||
if (!extraTempAllocated)
|
||||
{
|
||||
extraTemp = tmpDecl.value.ui32NumTemps++;
|
||||
extraTempAllocated = true;
|
||||
}
|
||||
|
||||
Instruction origSwapInst;
|
||||
#if _DEBUG
|
||||
origSwapInst.id = swapItr->id;
|
||||
#endif
|
||||
std::swap(*swapItr, origSwapInst); // Store the original swapc for reading
|
||||
|
||||
// OP 1: MOVC temp[dest0 mask], src0, src2, stc1
|
||||
swapItr->eOpcode = OPCODE_MOVC;
|
||||
swapItr->ui32NumOperands = 4;
|
||||
swapItr->ui32FirstSrc = 1;
|
||||
swapItr->asOperands[0] = origSwapInst.asOperands[0];
|
||||
swapItr->asOperands[0].eType = OPERAND_TYPE_TEMP;
|
||||
swapItr->asOperands[0].ui32RegisterNumber = extraTemp;
|
||||
// mask is already fine
|
||||
swapItr->asOperands[1] = origSwapInst.asOperands[2]; // src0
|
||||
swapItr->asOperands[2] = origSwapInst.asOperands[4]; // src2
|
||||
swapItr->asOperands[3] = origSwapInst.asOperands[3]; // src1
|
||||
// swapItr is already in the psInst vector.
|
||||
|
||||
Instruction newInst[2] = { Instruction(), Instruction() };
|
||||
// OP 2: MOVC dest1, src0, src1, src2
|
||||
newInst[0].eOpcode = OPCODE_MOVC;
|
||||
newInst[0].ui32NumOperands = 4;
|
||||
newInst[0].ui32FirstSrc = 1;
|
||||
newInst[0].asOperands[0] = origSwapInst.asOperands[1]; // dest1
|
||||
newInst[0].asOperands[1] = origSwapInst.asOperands[2]; // src0
|
||||
newInst[0].asOperands[2] = origSwapInst.asOperands[3]; // src1
|
||||
newInst[0].asOperands[3] = origSwapInst.asOperands[4]; // src2
|
||||
#if _DEBUG
|
||||
newInst[0].id = swapItr->id;
|
||||
#endif
|
||||
|
||||
// OP 3: mov dest0.mask, temp
|
||||
newInst[1].eOpcode = OPCODE_MOV;
|
||||
newInst[1].ui32NumOperands = 2;
|
||||
newInst[1].ui32FirstSrc = 1;
|
||||
newInst[1].asOperands[0] = origSwapInst.asOperands[0]; // dest 0
|
||||
// First copy dest0 to src as well to get the mask set up correctly
|
||||
newInst[1].asOperands[1] = origSwapInst.asOperands[0]; // dest 0;
|
||||
// Then overwrite with temp reg
|
||||
newInst[1].asOperands[1].eType = OPERAND_TYPE_TEMP;
|
||||
newInst[1].asOperands[1].ui32RegisterNumber = extraTemp;
|
||||
#if _DEBUG
|
||||
newInst[1].id = swapItr->id;
|
||||
#endif
|
||||
|
||||
// Insert the new instructions to the vector
|
||||
psInst.insert(swapItr + 1, newInst, newInst + 2);
|
||||
}
|
||||
}
|
||||
|
||||
void Shader::ExpandSWAPCs()
|
||||
{
|
||||
// Just call ExpandSWAPCs for each phase
|
||||
for (int i = 0; i < asPhases.size(); i++)
|
||||
{
|
||||
asPhases[i].ExpandSWAPCs();
|
||||
}
|
||||
}
|
||||
|
||||
void Shader::ForcePositionToHighp()
|
||||
{
|
||||
// Only sensible in vertex shaders (TODO: is this an issue in tessellation shaders? Do we even care?)
|
||||
if (eShaderType != VERTEX_SHADER)
|
||||
return;
|
||||
|
||||
ShaderPhase &phase = asPhases[0];
|
||||
|
||||
// Find the output declaration
|
||||
std::vector<Declaration>::iterator itr = std::find_if(phase.psDecl.begin(), phase.psDecl.end(), [this](const Declaration &decl) -> bool
|
||||
{
|
||||
if (decl.eOpcode == OPCODE_DCL_OUTPUT_SIV)
|
||||
{
|
||||
const SPECIAL_NAME specialName = decl.asOperands[0].eSpecialName;
|
||||
if (specialName == NAME_POSITION ||
|
||||
specialName == NAME_UNDEFINED) // This might be SV_Position (because d3dcompiler is weird).
|
||||
{
|
||||
const ShaderInfo::InOutSignature *sig = NULL;
|
||||
sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig);
|
||||
ASSERT(sig != NULL);
|
||||
if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0)
|
||||
{
|
||||
((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
else if (decl.eOpcode == OPCODE_DCL_OUTPUT)
|
||||
{
|
||||
const ShaderInfo::InOutSignature *sig = NULL;
|
||||
sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig);
|
||||
ASSERT(sig != NULL);
|
||||
if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0)
|
||||
{
|
||||
((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
// Do nothing if we don't find suitable output. This may well be INTERNALTESSPOS for tessellation etc.
|
||||
if (itr == phase.psDecl.end())
|
||||
return;
|
||||
|
||||
uint32_t outputPosReg = itr->asOperands[0].ui32RegisterNumber;
|
||||
|
||||
HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_DEST_OPERAND, [outputPosReg](std::vector<Instruction>::iterator itr, Operand *op, uint32_t flags)
|
||||
{
|
||||
if (op->eType == OPERAND_TYPE_OUTPUT && op->ui32RegisterNumber == outputPosReg)
|
||||
op->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT;
|
||||
});
|
||||
}
|
||||
|
||||
void Shader::FindUnusedGlobals(uint32_t flags)
|
||||
{
|
||||
for (int i = 0; i < asPhases.size(); i++)
|
||||
{
|
||||
ShaderPhase &phase = asPhases[i];
|
||||
|
||||
// Loop through every operand and pick up usages
|
||||
HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, [&](std::vector<Instruction>::iterator inst, Operand *op, uint32_t flags)
|
||||
{
|
||||
// Not a constant buffer read? continue
|
||||
if (op->eType != OPERAND_TYPE_CONSTANT_BUFFER)
|
||||
return;
|
||||
|
||||
const uint32_t ui32BindingPoint = op->aui32ArraySizes[0];
|
||||
const ConstantBuffer *psCBuf = NULL;
|
||||
sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf);
|
||||
|
||||
if (!psCBuf)
|
||||
return;
|
||||
|
||||
// Get all the struct members that can be reached from this usage:
|
||||
uint32_t mask = op->GetAccessMask();
|
||||
for (uint32_t k = 0; k < 4; k++)
|
||||
{
|
||||
if ((mask & (1 << k)) == 0)
|
||||
continue;
|
||||
|
||||
uint32_t tmpSwizzle[4] = {k, k, k, k};
|
||||
int rebase;
|
||||
bool isArray;
|
||||
|
||||
ShaderVarType *psVarType = NULL;
|
||||
|
||||
ShaderInfo::GetShaderVarFromOffset(op->aui32ArraySizes[1], tmpSwizzle, psCBuf, (const ShaderVarType**)&psVarType, &isArray, NULL, &rebase, flags);
|
||||
|
||||
// Mark as used. Also all parents.
|
||||
while (psVarType)
|
||||
{
|
||||
psVarType->m_IsUsed = true;
|
||||
psVarType = psVarType->Parent;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
520
third_party/HLSLcc/src/ShaderInfo.cpp
vendored
Normal file
520
third_party/HLSLcc/src/ShaderInfo.cpp
vendored
Normal file
@ -0,0 +1,520 @@
|
||||
#include "ShaderInfo.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "Operand.h"
|
||||
#include <stdlib.h>
|
||||
#include <sstream>
|
||||
#include <cctype>
|
||||
|
||||
|
||||
SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo)
|
||||
{
|
||||
const ResourceBinding* psBinding = 0;
|
||||
int found;
|
||||
found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding);
|
||||
ASSERT(found != 0);
|
||||
return psBinding->GetDataType();
|
||||
}
|
||||
|
||||
void ShaderInfo::GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const
|
||||
{
|
||||
ASSERT(ui32MajorVersion > 3);
|
||||
*ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]];
|
||||
}
|
||||
|
||||
int ShaderInfo::GetResourceFromBindingPoint(const ResourceGroup eGroup, uint32_t const ui32BindPoint, const ResourceBinding** ppsOutBinding) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumBindings = psResourceBindings.size();
|
||||
const ResourceBinding* psBindings = &psResourceBindings[0];
|
||||
|
||||
for (i = 0; i < ui32NumBindings; ++i)
|
||||
{
|
||||
if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup)
|
||||
{
|
||||
if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount))
|
||||
{
|
||||
*ppsOutBinding = psBindings + i;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset &&
|
||||
ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size))
|
||||
{
|
||||
*ppsShaderVar = &psThisPointerConstBuffer->asVars[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psInputSignatures.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0))
|
||||
{
|
||||
*ppsOut = &psInputSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
ASSERT(allowNull);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psPatchConstantSignatures.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0))
|
||||
{
|
||||
*ppsOut = &psPatchConstantSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks.
|
||||
// In those situations just take the last signature that uses that register (it's typically the "highest" one)
|
||||
for (i = ui32NumVars - 1; i-- > 0;)
|
||||
{
|
||||
if (ui32Register == psPatchConstantSignatures[i].ui32Register)
|
||||
{
|
||||
*ppsOut = &psPatchConstantSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(allowNull);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetOutputSignatureFromRegister(const uint32_t ui32Register,
|
||||
const uint32_t ui32CompMask,
|
||||
const uint32_t ui32Stream,
|
||||
const InOutSignature** ppsOut,
|
||||
bool allowNull /* = false */) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psOutputSignatures.size();
|
||||
ASSERT(ui32CompMask != 0);
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if (ui32Register == psOutputSignatures[i].ui32Register &&
|
||||
(ui32CompMask & psOutputSignatures[i].ui32Mask) &&
|
||||
ui32Stream == psOutputSignatures[i].ui32Stream)
|
||||
{
|
||||
*ppsOut = &psOutputSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
ASSERT(allowNull);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const
|
||||
{
|
||||
size_t i;
|
||||
const size_t ui32NumVars = psOutputSignatures.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
if (eSystemValueType == psOutputSignatures[i].eSystemValueType &&
|
||||
ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex)
|
||||
{
|
||||
*ppsOut = &psOutputSignatures[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t ShaderInfo::GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize)
|
||||
{
|
||||
// Default is regular matrices, vectors and scalars
|
||||
uint32_t size = psType->Columns * psType->Rows * 4;
|
||||
|
||||
// Struct size is calculated from the offset and size of its last member.
|
||||
// Need to take into account that members could be arrays.
|
||||
if (psType->Class == SVC_STRUCT)
|
||||
{
|
||||
size = psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors, true);
|
||||
}
|
||||
// Matrices represented as vec4 arrays have special size calculation
|
||||
else if (matrixAsVectors)
|
||||
{
|
||||
if (psType->Class == SVC_MATRIX_ROWS)
|
||||
{
|
||||
size = psType->Rows * 16;
|
||||
}
|
||||
else if (psType->Class == SVC_MATRIX_COLUMNS)
|
||||
{
|
||||
size = psType->Columns * 16;
|
||||
}
|
||||
}
|
||||
|
||||
if (wholeArraySize && psType->Elements > 1)
|
||||
{
|
||||
uint32_t paddedSize = ((size + 15) / 16) * 16; // Arrays are padded to float4 size
|
||||
size = (psType->Elements - 1) * paddedSize + size; // Except the last element
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType,
|
||||
uint32_t parentOffset,
|
||||
uint32_t offsetToFind,
|
||||
bool* isArray,
|
||||
std::vector<uint32_t>* arrayIndices,
|
||||
int32_t* pi32Rebase,
|
||||
uint32_t flags)
|
||||
{
|
||||
uint32_t thisOffset = parentOffset + psType->Offset;
|
||||
uint32_t thisSize = ShaderInfo::GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0);
|
||||
uint32_t paddedSize = ((thisSize + 15) / 16) * 16;
|
||||
uint32_t arraySize = thisSize;
|
||||
|
||||
// Array elements are padded to align on vec4 size, except for the last one
|
||||
if (psType->Elements)
|
||||
arraySize = (paddedSize * (psType->Elements - 1)) + thisSize;
|
||||
|
||||
if ((offsetToFind >= thisOffset) &&
|
||||
offsetToFind < (thisOffset + arraySize))
|
||||
{
|
||||
*isArray = false;
|
||||
if (psType->Class == SVC_STRUCT)
|
||||
{
|
||||
if (psType->Elements > 1 && arrayIndices != NULL)
|
||||
arrayIndices->push_back((offsetToFind - thisOffset) / thisSize);
|
||||
|
||||
// Need to bring offset back to element zero in case of array of structs
|
||||
uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize;
|
||||
uint32_t m = 0;
|
||||
|
||||
for (m = 0; m < psType->MemberCount; ++m)
|
||||
{
|
||||
const ShaderVarType* psMember = &psType->Members[m];
|
||||
|
||||
const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags);
|
||||
if (foundType != NULL)
|
||||
return foundType;
|
||||
}
|
||||
}
|
||||
// Check for array of scalars or vectors (both take up 16 bytes per element).
|
||||
// Matrices are also treated as arrays of vectors.
|
||||
else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) ||
|
||||
((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1))
|
||||
{
|
||||
*isArray = true;
|
||||
if (arrayIndices != NULL)
|
||||
arrayIndices->push_back((offsetToFind - thisOffset) / 16);
|
||||
}
|
||||
else if (psType->Class == SVC_VECTOR)
|
||||
{
|
||||
//Check for vector starting at a non-vec4 offset.
|
||||
|
||||
// cbuffer $Globals
|
||||
// {
|
||||
//
|
||||
// float angle; // Offset: 0 Size: 4
|
||||
// float2 angle2; // Offset: 4 Size: 8
|
||||
//
|
||||
// }
|
||||
|
||||
//cb0[0].x = angle
|
||||
//cb0[0].yzyy = angle2.xyxx
|
||||
|
||||
//Rebase angle2 so that .y maps to .x, .z maps to .y
|
||||
|
||||
pi32Rebase[0] = thisOffset % 16;
|
||||
}
|
||||
|
||||
return psType;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
|
||||
const uint32_t(&pui32Swizzle)[4],
|
||||
const ConstantBuffer* psCBuf,
|
||||
const ShaderVarType** ppsShaderVar, // Output the found var
|
||||
bool* isArray, // Output bool that tells if the found var is an array
|
||||
std::vector<uint32_t>* arrayIndices, // Output vector of array indices in order from root parent to the found var
|
||||
int32_t* pi32Rebase, // Output swizzle rebase
|
||||
uint32_t flags)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
uint32_t ui32ByteOffset = ui32Vec4Offset * 16;
|
||||
|
||||
//Swizzle can point to another variable. In the example below
|
||||
//cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined
|
||||
//into vectors. psCBuf->ui32NumVars will be 3.
|
||||
|
||||
// cbuffer cbUIUpdates
|
||||
// {
|
||||
// float g_fLifeSpan; // Offset: 0 Size: 4
|
||||
// float g_fLifeSpanVar; // Offset: 4 Size: 4 [unused]
|
||||
// float g_fRadiusMin; // Offset: 8 Size: 4 [unused]
|
||||
// float g_fRadiusMax; // Offset: 12 Size: 4 [unused]
|
||||
// float g_fGrowTime; // Offset: 16 Size: 4 [unused]
|
||||
// float g_fStepSize; // Offset: 20 Size: 4
|
||||
// float g_fTurnRate; // Offset: 24 Size: 4
|
||||
// float g_fTurnSpeed; // Offset: 28 Size: 4 [unused]
|
||||
// float g_fLeafRate; // Offset: 32 Size: 4
|
||||
// float g_fShrinkTime; // Offset: 36 Size: 4 [unused]
|
||||
// uint g_uMaxFaces; // Offset: 40 Size: 4
|
||||
// }
|
||||
if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y)
|
||||
{
|
||||
ui32ByteOffset += 4;
|
||||
}
|
||||
else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z)
|
||||
{
|
||||
ui32ByteOffset += 8;
|
||||
}
|
||||
else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W)
|
||||
{
|
||||
ui32ByteOffset += 12;
|
||||
}
|
||||
|
||||
const size_t ui32NumVars = psCBuf->asVars.size();
|
||||
|
||||
for (i = 0; i < ui32NumVars; ++i)
|
||||
{
|
||||
ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags);
|
||||
|
||||
if (ppsShaderVar[0] != NULL)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array.
|
||||
// Searches for brackets and inserts indices one by one.
|
||||
std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector<uint32_t>& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
size_t prevpos = 0;
|
||||
size_t pos = psShaderVar->fullName.find('[', 0);
|
||||
uint32_t i = 0;
|
||||
while (pos != std::string::npos)
|
||||
{
|
||||
pos++;
|
||||
oss << psShaderVar->fullName.substr(prevpos, pos - prevpos);
|
||||
|
||||
// Add possibly given dynamic index for the root array.
|
||||
if (i == 0 && !dynamicIndex.empty())
|
||||
{
|
||||
oss << dynamicIndex;
|
||||
|
||||
// if we couldn't use original index temp, revert the float4 address calc here
|
||||
if (revertDynamicIndexCalc)
|
||||
{
|
||||
const ShaderVarType* psRootVar = psShaderVar;
|
||||
while (psRootVar->Parent != NULL)
|
||||
psRootVar = psRootVar->Parent;
|
||||
|
||||
uint32_t thisSize = (GetCBVarSize(psRootVar, matrixAsVectors) + 15) / 16; // size in float4
|
||||
oss << " / " << thisSize;
|
||||
}
|
||||
|
||||
if (!indices.empty() && indices[i] != 0)
|
||||
oss << " + " << indices[i];
|
||||
}
|
||||
else if (i < indices.size())
|
||||
oss << indices[i];
|
||||
|
||||
prevpos = pos;
|
||||
i++;
|
||||
pos = psShaderVar->fullName.find('[', prevpos);
|
||||
}
|
||||
oss << psShaderVar->fullName.substr(prevpos);
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType)
|
||||
{
|
||||
switch (eType)
|
||||
{
|
||||
case RTYPE_CBUFFER:
|
||||
return RGROUP_CBUFFER;
|
||||
|
||||
case RTYPE_SAMPLER:
|
||||
return RGROUP_SAMPLER;
|
||||
|
||||
case RTYPE_TEXTURE:
|
||||
case RTYPE_BYTEADDRESS:
|
||||
case RTYPE_STRUCTURED:
|
||||
return RGROUP_TEXTURE;
|
||||
|
||||
case RTYPE_UAV_RWTYPED:
|
||||
case RTYPE_UAV_RWSTRUCTURED:
|
||||
case RTYPE_UAV_RWBYTEADDRESS:
|
||||
case RTYPE_UAV_APPEND_STRUCTURED:
|
||||
case RTYPE_UAV_CONSUME_STRUCTURED:
|
||||
case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER:
|
||||
return RGROUP_UAV;
|
||||
|
||||
case RTYPE_TBUFFER:
|
||||
ASSERT(0); // Need to find out which group this belongs to
|
||||
return RGROUP_TEXTURE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT(0);
|
||||
return RGROUP_CBUFFER;
|
||||
}
|
||||
|
||||
static inline std::string GetTextureNameFromSamplerName(const std::string& samplerIn)
|
||||
{
|
||||
ASSERT(samplerIn.compare(0, 7, "sampler") == 0);
|
||||
|
||||
// please note that we do not have hard rules about how sampler names should be structured
|
||||
// what's more they can even skip texture name (but that should be handled separately)
|
||||
// how do we try to deduce the texture name: we remove known tokens, and take the leftmost (first) "word"
|
||||
// note that we want to support c-style naming (with underscores for spaces)
|
||||
// as it is pretty normal to have texture name starting with underscore
|
||||
// we bind underscores "to the right"
|
||||
|
||||
// note that we want sampler state to be case insensitive
|
||||
// while checking for a match could be done with strncasecmp/_strnicmp
|
||||
// windows is missing case-insensetive "find substring" (strcasestr), so we transform to lowercase instead
|
||||
std::string sampler = samplerIn;
|
||||
for (std::string::iterator i = sampler.begin(), in = sampler.end(); i != in; ++i)
|
||||
*i = std::tolower(*i);
|
||||
|
||||
struct Token { const char* str; int len; };
|
||||
#define TOKEN(s) { s, (int)strlen(s) }
|
||||
Token token[] = {
|
||||
TOKEN("compare"),
|
||||
TOKEN("point"), TOKEN("trilinear"), TOKEN("linear"),
|
||||
TOKEN("clamp"), TOKEN("clampu"), TOKEN("clampv"), TOKEN("clampw"),
|
||||
TOKEN("repeat"), TOKEN("repeatu"), TOKEN("repeatv"), TOKEN("repeatw"),
|
||||
TOKEN("mirror"), TOKEN("mirroru"), TOKEN("mirrorv"), TOKEN("mirrorw"),
|
||||
TOKEN("mirroronce"), TOKEN("mirroronceu"), TOKEN("mirroroncev"), TOKEN("mirroroncew"),
|
||||
};
|
||||
#undef TOKEN
|
||||
|
||||
const char* s = sampler.c_str();
|
||||
for (int texNameStart = 7; s[texNameStart];)
|
||||
{
|
||||
// skip underscores and find the potential beginning of a token
|
||||
int tokenStart = texNameStart, tokenEnd = -1;
|
||||
while (s[tokenStart] == '_')
|
||||
++tokenStart;
|
||||
|
||||
// check token list for matches
|
||||
for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n && tokenEnd < 0; ++i)
|
||||
if (strncmp(s + tokenStart, token[i].str, token[i].len) == 0)
|
||||
tokenEnd = tokenStart + token[i].len;
|
||||
|
||||
if (tokenEnd < 0)
|
||||
{
|
||||
// we have found texture name
|
||||
|
||||
// find next token
|
||||
int nextTokenStart = sampler.length();
|
||||
for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n; ++i)
|
||||
{
|
||||
// again: note that we want to be case insensitive
|
||||
const int pos = sampler.find(token[i].str, tokenStart);
|
||||
|
||||
if (pos != std::string::npos && pos < nextTokenStart)
|
||||
nextTokenStart = pos;
|
||||
}
|
||||
|
||||
// check preceeding underscores, but only if we have found an actual token (not the end of the string)
|
||||
if (nextTokenStart < sampler.length())
|
||||
{
|
||||
while (nextTokenStart > tokenStart && s[nextTokenStart - 1] == '_')
|
||||
--nextTokenStart;
|
||||
}
|
||||
|
||||
// note that we return the substring of the initial sampler name to preserve case
|
||||
return samplerIn.substr(texNameStart, nextTokenStart - texNameStart);
|
||||
}
|
||||
else
|
||||
{
|
||||
// we have found known token
|
||||
texNameStart = tokenEnd;
|
||||
}
|
||||
}
|
||||
|
||||
// if we ended up here, the texture name is missing
|
||||
return "";
|
||||
}
|
||||
|
||||
// note that we dont have the means right now to have unit tests in hlslcc, so we do poor man testing below
|
||||
// AddSamplerPrecisions is called once for every program, so it is easy to uncomment and test
|
||||
static inline void Test_GetTextureNameFromSamplerName()
|
||||
{
|
||||
#define CHECK(s, t) ASSERT(GetTextureNameFromSamplerName(std::string(s)) == std::string(t))
|
||||
|
||||
CHECK("sampler_point_clamp", "");
|
||||
CHECK("sampler_point_clamp_Tex", "_Tex");
|
||||
CHECK("sampler_point_clamp_Tex__", "_Tex__");
|
||||
CHECK("sampler_______point_Tex", "_Tex");
|
||||
|
||||
CHECK("samplerPointClamp", "");
|
||||
CHECK("samplerPointClamp_Tex", "_Tex");
|
||||
CHECK("samplerPointClamp_Tex__", "_Tex__");
|
||||
|
||||
CHECK("samplerPointTexClamp", "Tex");
|
||||
CHECK("samplerPoint_TexClamp", "_Tex");
|
||||
CHECK("samplerPoint_Tex_Clamp", "_Tex");
|
||||
|
||||
#undef CHECK
|
||||
}
|
||||
|
||||
void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info)
|
||||
{
|
||||
if (info.empty())
|
||||
return;
|
||||
|
||||
#if _DEBUG && 0
|
||||
Test_GetTextureNameFromSamplerName();
|
||||
#endif
|
||||
|
||||
for (size_t i = 0; i < psResourceBindings.size(); i++)
|
||||
{
|
||||
ResourceBinding *rb = &psResourceBindings[i];
|
||||
if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE && rb->eType != RTYPE_UAV_RWTYPED)
|
||||
continue;
|
||||
|
||||
// Try finding the exact match
|
||||
HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name);
|
||||
|
||||
// If match not found, check if name has "sampler" prefix (DX11 style sampler case)
|
||||
// then we try to recover texture name from sampler name
|
||||
if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0)
|
||||
j = info.find(GetTextureNameFromSamplerName(rb->name));
|
||||
|
||||
// note that if we didnt find the respective texture, we cannot say anything about sampler precision
|
||||
// currently it will become "unknown" resulting in half format, even if we sample with it the texture explicitly marked as float
|
||||
// TODO: should we somehow allow overriding it?
|
||||
if (j != info.end())
|
||||
rb->ePrecision = j->second;
|
||||
}
|
||||
}
|
814
third_party/HLSLcc/src/UseDefineChains.cpp
vendored
Normal file
814
third_party/HLSLcc/src/UseDefineChains.cpp
vendored
Normal file
@ -0,0 +1,814 @@
|
||||
#include "internal_includes/UseDefineChains.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
|
||||
#include "internal_includes/ControlFlowGraph.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include <algorithm>
|
||||
|
||||
using HLSLcc::ForEachOperand;
|
||||
|
||||
#define DEBUG_UDCHAINS 0
|
||||
|
||||
#if DEBUG_UDCHAINS
|
||||
// Debug mode
|
||||
static void UDCheckConsistencyDUChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
|
||||
{
|
||||
DefineUseChain::iterator du = psDUChains[idx].begin();
|
||||
UseDefineChain::iterator ud = psUDChains[idx].begin();
|
||||
while (du != psDUChains[idx].end())
|
||||
{
|
||||
ASSERT(du->index == idx % 4);
|
||||
// Check that the definition actually writes to idx
|
||||
{
|
||||
uint32_t tempReg = idx / 4;
|
||||
uint32_t offs = idx - (tempReg * 4);
|
||||
uint32_t accessMask = 1 << offs;
|
||||
uint32_t i;
|
||||
int found = 0;
|
||||
for (i = 0; i < du->psInst->ui32FirstSrc; i++)
|
||||
{
|
||||
if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP)
|
||||
{
|
||||
if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg)
|
||||
{
|
||||
uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]);
|
||||
if (writeMask & accessMask)
|
||||
{
|
||||
ASSERT(writeMask == du->writeMask);
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT(found);
|
||||
}
|
||||
|
||||
// Check that each usage of each definition also is found in the use-define chain
|
||||
UsageSet::iterator ul = du->usages.begin();
|
||||
while (ul != du->usages.end())
|
||||
{
|
||||
// Search for the usage in the chain
|
||||
UseDefineChain::iterator use = ud;
|
||||
while (use != psUDChains[idx].end() && &*use != *ul)
|
||||
use++;
|
||||
ASSERT(use != psUDChains[idx].end());
|
||||
ASSERT(&*use == *ul);
|
||||
|
||||
// Check that the mapping back is also found
|
||||
ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end());
|
||||
|
||||
ul++;
|
||||
}
|
||||
|
||||
du++;
|
||||
}
|
||||
}
|
||||
|
||||
static void UDCheckConsistencyUDChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
|
||||
{
|
||||
DefineUseChain::iterator du = psDUChains[idx].begin();
|
||||
UseDefineChain::iterator ud = psUDChains[idx].begin();
|
||||
while (ud != psUDChains[idx].end())
|
||||
{
|
||||
// Check that each definition of each usage also is found in the define-use chain
|
||||
DefineSet::iterator dl = ud->defines.begin();
|
||||
ASSERT(ud->psOp->ui32RegisterNumber == idx / 4);
|
||||
ASSERT(ud->index == idx % 4);
|
||||
while (dl != ud->defines.end())
|
||||
{
|
||||
// Search for the definition in the chain
|
||||
DefineUseChain::iterator def = du;
|
||||
while (def != psDUChains[idx].end() && &*def != *dl)
|
||||
def++;
|
||||
ASSERT(def != psDUChains[idx].end());
|
||||
ASSERT(&*def == *dl);
|
||||
|
||||
// Check that the mapping back is also found
|
||||
ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end());
|
||||
|
||||
dl++;
|
||||
}
|
||||
ud++;
|
||||
}
|
||||
}
|
||||
|
||||
static void UDCheckConsistency(uint32_t tempRegs, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i < tempRegs * 4; i++)
|
||||
{
|
||||
UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions);
|
||||
UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions);
|
||||
}
|
||||
}
|
||||
|
||||
#define printf_console printf
|
||||
|
||||
#endif
|
||||
|
||||
using namespace HLSLcc::ControlFlow;
|
||||
using std::for_each;
|
||||
|
||||
static DefineUseChainEntry *GetOrCreateDefinition(const BasicBlock::Definition &def, DefineUseChain &psDUChain, uint32_t index)
|
||||
{
|
||||
// Try to find an existing entry
|
||||
auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de)
|
||||
{
|
||||
return de.psInst == def.m_Instruction && de.psOp == def.m_Operand;
|
||||
});
|
||||
|
||||
if (itr != psDUChain.end())
|
||||
{
|
||||
return &(*itr);
|
||||
}
|
||||
|
||||
// Not found, create
|
||||
psDUChain.push_front(DefineUseChainEntry());
|
||||
DefineUseChainEntry &de = *psDUChain.begin();
|
||||
|
||||
de.psInst = (Instruction *)def.m_Instruction;
|
||||
de.psOp = (Operand *)def.m_Operand;
|
||||
de.index = index;
|
||||
de.writeMask = def.m_Operand->GetAccessMask();
|
||||
de.psSiblings[index] = &de;
|
||||
|
||||
return &de;
|
||||
}
|
||||
|
||||
// Do flow control analysis on the instructions and build the define-use and use-define chains
|
||||
void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChain, UseDefineChains &psUDChain, HLSLcc::ControlFlow::ControlFlowGraph &cfg)
|
||||
{
|
||||
ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp
|
||||
|
||||
psDUChain.clear();
|
||||
psUDChain.clear();
|
||||
|
||||
for (uint32_t i = 0; i < ui32NumTemps * 4; i++)
|
||||
{
|
||||
psUDChain.insert(std::make_pair(i, UseDefineChain()));
|
||||
psDUChain.insert(std::make_pair(i, DefineUseChain()));
|
||||
}
|
||||
|
||||
const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks();
|
||||
|
||||
// Loop through each block, first calculate the union of all the reachables of all preceding blocks
|
||||
// and then build on that as we go along the basic block instructions
|
||||
for_each(blocks.begin(), blocks.end(), [&](const HLSLcc::shared_ptr<BasicBlock> &bptr)
|
||||
{
|
||||
const BasicBlock &b = *bptr.get();
|
||||
BasicBlock::ReachableVariables rvars;
|
||||
for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock)
|
||||
{
|
||||
const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock);
|
||||
BasicBlock::RVarUnion(rvars, b.Reachable());
|
||||
});
|
||||
|
||||
// Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions
|
||||
for (const Instruction *inst = b.First(); inst <= b.Last(); inst++)
|
||||
{
|
||||
// Process sources first
|
||||
ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
|
||||
[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
|
||||
{
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
uint32_t tempReg = psOperand->ui32RegisterNumber;
|
||||
uint32_t accessMask = psOperand->GetAccessMask();
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(accessMask & (1 << k)))
|
||||
continue;
|
||||
|
||||
uint32_t regIdx = tempReg * 4 + k;
|
||||
|
||||
// Add an use for all visible definitions
|
||||
psUDChain[regIdx].push_front(UseDefineChainEntry());
|
||||
UseDefineChainEntry &ue = *psUDChain[regIdx].begin();
|
||||
ue.psInst = (Instruction *)psInst;
|
||||
ue.psOp = (Operand *)psOperand;
|
||||
ue.accessMask = accessMask;
|
||||
ue.index = k;
|
||||
ue.psSiblings[k] = &ue;
|
||||
// ue.siblings will be filled out later.
|
||||
|
||||
BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx];
|
||||
for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def)
|
||||
{
|
||||
DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k);
|
||||
ue.defines.insert(duentry);
|
||||
duentry->usages.insert(&ue);
|
||||
});
|
||||
}
|
||||
return;
|
||||
});
|
||||
|
||||
// Then the destination operands
|
||||
ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND,
|
||||
[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
|
||||
{
|
||||
if (psOperand->eType != OPERAND_TYPE_TEMP)
|
||||
return;
|
||||
|
||||
uint32_t tempReg = psOperand->ui32RegisterNumber;
|
||||
uint32_t accessMask = psOperand->GetAccessMask();
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(accessMask & (1 << k)))
|
||||
continue;
|
||||
|
||||
uint32_t regIdx = tempReg * 4 + k;
|
||||
|
||||
// Overwrite whatever's in rvars; they are killed by this
|
||||
rvars[regIdx].clear();
|
||||
rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand));
|
||||
|
||||
// Make sure the definition gets created even though it doesn't have any uses at all
|
||||
// (happens when sampling a texture but not all channels are used etc).
|
||||
GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k);
|
||||
}
|
||||
return;
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Connect the siblings for all uses and definitions
|
||||
for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair<const uint32_t, UseDefineChain> &udpair)
|
||||
{
|
||||
UseDefineChain &ud = udpair.second;
|
||||
// Clear out the bottom 2 bits to get the actual base reg
|
||||
uint32_t baseReg = udpair.first & ~(3);
|
||||
|
||||
for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue)
|
||||
{
|
||||
ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber);
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
// Skip components that we don't access, or the one that's our own
|
||||
if (!(ue.accessMask & (1 << k)) || ue.index == k)
|
||||
continue;
|
||||
|
||||
// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
|
||||
UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; });
|
||||
ASSERT(siblItr != psUDChain[baseReg + k].end());
|
||||
UseDefineChainEntry &sibling = *siblItr;
|
||||
ue.psSiblings[k] = &sibling;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Same for definitions
|
||||
for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair<const uint32_t, DefineUseChain> &dupair)
|
||||
{
|
||||
DefineUseChain &du = dupair.second;
|
||||
// Clear out the bottom 2 bits to get the actual base reg
|
||||
uint32_t baseReg = dupair.first & ~(3);
|
||||
|
||||
for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de)
|
||||
{
|
||||
ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber);
|
||||
|
||||
// Go through each component
|
||||
for (int k = 0; k < 4; k++)
|
||||
{
|
||||
// Skip components that we don't access, or the one that's our own
|
||||
if (!(de.writeMask & (1 << k)) || de.index == k)
|
||||
continue;
|
||||
|
||||
// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
|
||||
DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; });
|
||||
ASSERT(siblItr != psDUChain[baseReg + k].end());
|
||||
DefineUseChainEntry &sibling = *siblItr;
|
||||
de.psSiblings[k] = &sibling;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
#if DEBUG_UDCHAINS
|
||||
UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions);
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef std::vector<DefineUseChainEntry *> SplitDefinitions;
|
||||
|
||||
// Split out a define to use a new temp register
|
||||
static void UDDoSplit(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
|
||||
{
|
||||
uint32_t newReg = *psNumTemps;
|
||||
uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber;
|
||||
uint32_t accessMask = defs[0]->writeMask;
|
||||
uint32_t i, u32def;
|
||||
uint32_t rebase, count;
|
||||
uint32_t splitTableValue;
|
||||
|
||||
ASSERT(defs.size() > 0);
|
||||
for (i = 1; i < defs.size(); i++)
|
||||
{
|
||||
ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg);
|
||||
accessMask |= defs[i]->writeMask;
|
||||
}
|
||||
|
||||
|
||||
(*psNumTemps)++;
|
||||
|
||||
|
||||
#if DEBUG_UDCHAINS
|
||||
UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions());
|
||||
#endif
|
||||
ASSERT(accessMask != 0 && accessMask <= 0xf);
|
||||
// Calculate rebase value and component count
|
||||
rebase = 0;
|
||||
count = 0;
|
||||
i = accessMask;
|
||||
while ((i & 1) == 0)
|
||||
{
|
||||
rebase++;
|
||||
i = i >> 1;
|
||||
}
|
||||
while (i != 0)
|
||||
{
|
||||
count++;
|
||||
i = i >> 1;
|
||||
}
|
||||
|
||||
// Make sure there's enough room in the split table
|
||||
if (pui32SplitTable.size() <= newReg)
|
||||
{
|
||||
size_t newSize = pui32SplitTable.size() * 2;
|
||||
pui32SplitTable.resize(newSize, 0xffffffff);
|
||||
}
|
||||
|
||||
// Set the original temp of the new register
|
||||
{
|
||||
uint32_t origTemp = oldReg;
|
||||
while (pui32SplitTable[origTemp] != 0xffffffff)
|
||||
origTemp = pui32SplitTable[origTemp] & 0xffff;
|
||||
|
||||
ASSERT(rebase < 4);
|
||||
ASSERT(count <= 4);
|
||||
splitTableValue = (count << 24) | (rebase << 16) | origTemp;
|
||||
|
||||
pui32SplitTable[newReg] = splitTableValue;
|
||||
}
|
||||
|
||||
// Insert the new temps to the map
|
||||
for (i = newReg * 4; i < newReg * 4 + 4; i++)
|
||||
{
|
||||
psUDChains.insert(std::make_pair(i, UseDefineChain()));
|
||||
psDUChains.insert(std::make_pair(i, DefineUseChain()));
|
||||
}
|
||||
|
||||
for (u32def = 0; u32def < defs.size(); u32def++)
|
||||
{
|
||||
DefineUseChainEntry *defineToSplit = defs[u32def];
|
||||
uint32_t oldIdx = defineToSplit->index;
|
||||
#if DEBUG_UDCHAINS
|
||||
printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count);
|
||||
#endif
|
||||
|
||||
// We may have moved the opcodes already because of multiple defines pointing to the same op
|
||||
if (defineToSplit->psOp->ui32RegisterNumber != newReg)
|
||||
{
|
||||
ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg);
|
||||
// Update the declaration operand
|
||||
// Don't change possible suboperands as they are sources
|
||||
defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
|
||||
}
|
||||
|
||||
defineToSplit->writeMask >>= rebase;
|
||||
defineToSplit->index -= rebase;
|
||||
// Change the temp register number for all usages
|
||||
UsageSet::iterator ul = defineToSplit->usages.begin();
|
||||
while (ul != defineToSplit->usages.end())
|
||||
{
|
||||
// Already updated by one of the siblings? Skip.
|
||||
if ((*ul)->psOp->ui32RegisterNumber != newReg)
|
||||
{
|
||||
ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg);
|
||||
(*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
|
||||
}
|
||||
|
||||
// Update the UD chain
|
||||
{
|
||||
UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin();
|
||||
while (udLoc != psUDChains[oldReg * 4 + oldIdx].end())
|
||||
{
|
||||
if (&*udLoc == *ul)
|
||||
{
|
||||
// Move to new list
|
||||
psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc);
|
||||
|
||||
if (rebase > 0)
|
||||
{
|
||||
(*ul)->accessMask >>= rebase;
|
||||
(*ul)->index -= rebase;
|
||||
memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *));
|
||||
}
|
||||
break;
|
||||
}
|
||||
udLoc++;
|
||||
}
|
||||
}
|
||||
|
||||
ul++;
|
||||
}
|
||||
|
||||
// Move the define out of the old chain (if its still there)
|
||||
{
|
||||
// Find the define in the old chain
|
||||
DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin();
|
||||
while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit))
|
||||
{
|
||||
duLoc++;
|
||||
}
|
||||
ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end());
|
||||
{
|
||||
// Move directly to new chain
|
||||
psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc);
|
||||
if (rebase != 0)
|
||||
{
|
||||
memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if DEBUG_UDCHAINS
|
||||
UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions());
|
||||
#endif
|
||||
}
|
||||
|
||||
// Adds a define and all its siblings to the list, checking duplicates
|
||||
static void AddDefineToList(SplitDefinitions &defs, DefineUseChainEntry *newDef)
|
||||
{
|
||||
uint32_t k;
|
||||
for (k = 0; k < 4; k++)
|
||||
{
|
||||
if (newDef->psSiblings[k])
|
||||
{
|
||||
DefineUseChainEntry *defToAdd = newDef->psSiblings[k];
|
||||
uint32_t m;
|
||||
int defFound = 0;
|
||||
for (m = 0; m < defs.size(); m++)
|
||||
{
|
||||
if (defs[m] == defToAdd)
|
||||
{
|
||||
defFound = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (defFound == 0)
|
||||
{
|
||||
defs.push_back(newDef->psSiblings[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if a set of definitions can be split and does the split. Returns nonzero if a split took place
|
||||
static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
|
||||
{
|
||||
uint32_t reg;
|
||||
uint32_t combinedMask;
|
||||
uint32_t i, k, u32def;
|
||||
int canSplit = 1;
|
||||
DefineUseChain::iterator du;
|
||||
int hasLeftoverDefinitions = 0;
|
||||
// Initial checks: all definitions must:
|
||||
// Access the same register
|
||||
// Have at least one definition in any of the 4 register slots that isn't included
|
||||
if (defs.empty())
|
||||
return 0;
|
||||
|
||||
reg = defs[0]->psOp->ui32RegisterNumber;
|
||||
combinedMask = defs[0]->writeMask;
|
||||
for (i = 1; i < defs.size(); i++)
|
||||
{
|
||||
if (reg != defs[i]->psOp->ui32RegisterNumber)
|
||||
return 0;
|
||||
|
||||
combinedMask |= defs[i]->writeMask;
|
||||
}
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
du = psDUChains[reg * 4 + i].begin();
|
||||
while (du != psDUChains[reg * 4 + i].end())
|
||||
{
|
||||
int defFound = 0;
|
||||
for (k = 0; k < defs.size(); k++)
|
||||
{
|
||||
if (&*du == defs[k])
|
||||
{
|
||||
defFound = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (defFound == 0)
|
||||
{
|
||||
hasLeftoverDefinitions = 1;
|
||||
break;
|
||||
}
|
||||
du++;
|
||||
}
|
||||
if (hasLeftoverDefinitions)
|
||||
break;
|
||||
}
|
||||
// We'd be splitting the entire register and all its definitions, no point in that.
|
||||
if (hasLeftoverDefinitions == 0)
|
||||
return 0;
|
||||
|
||||
// Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array.
|
||||
for (u32def = 0; u32def < defs.size(); u32def++)
|
||||
{
|
||||
DefineUseChainEntry *def = defs[u32def];
|
||||
|
||||
UsageSet::iterator ul = def->usages.begin();
|
||||
while (ul != def->usages.end())
|
||||
{
|
||||
uint32_t j;
|
||||
|
||||
// Check that we only read a subset of the combined writemask
|
||||
if (((*ul)->accessMask & (~combinedMask)) != 0)
|
||||
{
|
||||
// Do an additional attempt, pick up all the sibling definitions as well
|
||||
// Only do this if we have the space in the definitions table
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (((*ul)->accessMask & (1 << j)) == 0)
|
||||
continue;
|
||||
AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin());
|
||||
}
|
||||
return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
|
||||
}
|
||||
|
||||
// It must have at least one declaration
|
||||
ASSERT(!(*ul)->defines.empty());
|
||||
|
||||
// Check that all siblings for the usage use one of the definitions
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
uint32_t m;
|
||||
int defineFound = 0;
|
||||
if (((*ul)->accessMask & (1 << j)) == 0)
|
||||
continue;
|
||||
|
||||
ASSERT((*ul)->psSiblings[j] != NULL);
|
||||
ASSERT(!(*ul)->psSiblings[j]->defines.empty());
|
||||
|
||||
// Check that all definitions for this usage are found from the definitions table
|
||||
DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin();
|
||||
while (dl != (*ul)->psSiblings[j]->defines.end())
|
||||
{
|
||||
defineFound = 0;
|
||||
for (m = 0; m < defs.size(); m++)
|
||||
{
|
||||
if (*dl == defs[m])
|
||||
{
|
||||
defineFound = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (defineFound == 0)
|
||||
{
|
||||
// Add this define and all its siblings to the table and try again
|
||||
AddDefineToList(defs, *dl);
|
||||
return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
|
||||
}
|
||||
|
||||
dl++;
|
||||
}
|
||||
|
||||
if (defineFound == 0)
|
||||
{
|
||||
canSplit = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (canSplit == 0)
|
||||
break;
|
||||
|
||||
// This'll do, check next usage
|
||||
ul++;
|
||||
}
|
||||
if (canSplit == 0)
|
||||
break;
|
||||
}
|
||||
if (canSplit)
|
||||
{
|
||||
UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Do temp splitting based on use-define chains
|
||||
void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
|
||||
{
|
||||
// Algorithm overview:
|
||||
// Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable),
|
||||
// split it out.
|
||||
uint32_t i;
|
||||
uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition
|
||||
for (i = 0; i < tempsAtStart * 4; i++)
|
||||
{
|
||||
// No definitions?
|
||||
if (psDUChains[i].empty())
|
||||
continue;
|
||||
|
||||
DefineUseChain::iterator du = psDUChains[i].begin();
|
||||
// Ok we have multiple definitions for a temp, check them through
|
||||
while (du != psDUChains[i].end())
|
||||
{
|
||||
SplitDefinitions sd;
|
||||
AddDefineToList(sd, &*du);
|
||||
du++;
|
||||
// If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain
|
||||
if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable))
|
||||
{
|
||||
du = psDUChains[i].begin();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if all the usages of this definitions are instructions that deal with floating point data
|
||||
static bool HasOnlyFloatUsages(DefineUseChain::iterator du)
|
||||
{
|
||||
UsageSet::iterator itr = du->usages.begin();
|
||||
for (; itr != du->usages.end(); itr++)
|
||||
{
|
||||
Instruction *psInst = (*itr)->psInst;
|
||||
|
||||
if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
|
||||
return false;
|
||||
|
||||
switch (psInst->eOpcode)
|
||||
{
|
||||
case OPCODE_ADD:
|
||||
case OPCODE_MUL:
|
||||
case OPCODE_MOV:
|
||||
case OPCODE_MAD:
|
||||
case OPCODE_DIV:
|
||||
case OPCODE_LOG:
|
||||
case OPCODE_EXP:
|
||||
case OPCODE_MAX:
|
||||
case OPCODE_MIN:
|
||||
case OPCODE_DP2:
|
||||
case OPCODE_DP2ADD:
|
||||
case OPCODE_DP3:
|
||||
case OPCODE_DP4:
|
||||
case OPCODE_RSQ:
|
||||
case OPCODE_SQRT:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Based on the sampler precisions, downgrade the definitions if possible.
|
||||
void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains, uint32_t ui32NumTemps)
|
||||
{
|
||||
uint32_t madeProgress = 0;
|
||||
do
|
||||
{
|
||||
uint32_t i;
|
||||
madeProgress = 0;
|
||||
for (i = 0; i < ui32NumTemps * 4; i++)
|
||||
{
|
||||
DefineUseChain::iterator du = psDUChains[i].begin();
|
||||
while (du != psDUChains[i].end())
|
||||
{
|
||||
OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
|
||||
if (du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType)
|
||||
&& du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP
|
||||
&& du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT
|
||||
&& du->isStandalone
|
||||
&& HasOnlyFloatUsages(du))
|
||||
{
|
||||
uint32_t sibl;
|
||||
// Ok we can change the precision.
|
||||
ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP);
|
||||
ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT);
|
||||
du->psOp->eMinPrecision = sType;
|
||||
|
||||
// Update all the uses of all the siblings
|
||||
for (sibl = 0; sibl < 4; sibl++)
|
||||
{
|
||||
if (!du->psSiblings[sibl])
|
||||
continue;
|
||||
|
||||
UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
|
||||
while (ul != du->psSiblings[sibl]->usages.end())
|
||||
{
|
||||
ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT ||
|
||||
(*ul)->psOp->eMinPrecision == sType);
|
||||
// We may well write this multiple times to the same op but that's fine.
|
||||
(*ul)->psOp->eMinPrecision = sType;
|
||||
|
||||
ul++;
|
||||
}
|
||||
}
|
||||
madeProgress = 1;
|
||||
}
|
||||
du++;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (madeProgress != 0);
|
||||
}
|
||||
|
||||
void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i < ui32NumTemps * 4; i++)
|
||||
{
|
||||
DefineUseChain::iterator du = psDUChains[i].begin();
|
||||
while (du != psDUChains[i].end())
|
||||
{
|
||||
uint32_t sibl;
|
||||
int isStandalone = 1;
|
||||
if (du->isStandalone)
|
||||
{
|
||||
du++;
|
||||
continue;
|
||||
}
|
||||
|
||||
for (sibl = 0; sibl < 4; sibl++)
|
||||
{
|
||||
if (!du->psSiblings[sibl])
|
||||
continue;
|
||||
|
||||
UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
|
||||
while (ul != du->psSiblings[sibl]->usages.end())
|
||||
{
|
||||
uint32_t k;
|
||||
ASSERT(!(*ul)->defines.empty());
|
||||
|
||||
// Need to check that all the siblings of this usage only see this definition's corresponding sibling
|
||||
for (k = 0; k < 4; k++)
|
||||
{
|
||||
if (!(*ul)->psSiblings[k])
|
||||
continue;
|
||||
|
||||
if ((*ul)->psSiblings[k]->defines.size() > 1
|
||||
|| *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k])
|
||||
{
|
||||
isStandalone = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isStandalone == 0)
|
||||
break;
|
||||
|
||||
ul++;
|
||||
}
|
||||
if (isStandalone == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (isStandalone)
|
||||
{
|
||||
// Yep, mark it
|
||||
for (sibl = 0; sibl < 4; sibl++)
|
||||
{
|
||||
if (!du->psSiblings[sibl])
|
||||
continue;
|
||||
du->psSiblings[sibl]->isStandalone = 1;
|
||||
}
|
||||
}
|
||||
du++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write the uses and defines back to Instruction and Operand member lists.
|
||||
void WriteBackUsesAndDefines(DefineUseChains &psDUChains)
|
||||
{
|
||||
using namespace std;
|
||||
// Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them
|
||||
for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr)
|
||||
{
|
||||
const DefineUseChain &duChain = itr.second;
|
||||
for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du)
|
||||
{
|
||||
for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage)
|
||||
{
|
||||
// Update instruction use list
|
||||
du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp));
|
||||
// And the usage's definition
|
||||
usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp));
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
87
third_party/HLSLcc/src/cbstring/bsafe.c
vendored
Normal file
87
third_party/HLSLcc/src/cbstring/bsafe.c
vendored
Normal file
@ -0,0 +1,87 @@
|
||||
/*
|
||||
* This source file is part of the bstring string library. This code was
|
||||
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
|
||||
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
|
||||
* for details on usage and license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bsafe.c
|
||||
*
|
||||
* This is an optional module that can be used to help enforce a safety
|
||||
* standard based on pervasive usage of bstrlib. This file is not necessarily
|
||||
* portable, however, it has been tested to work correctly with Intel's C/C++
|
||||
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "bsafe.h"
|
||||
|
||||
#if 0
|
||||
static int bsafeShouldExit = 1;
|
||||
|
||||
char * strcpy(char *dst, const char *src);
|
||||
char * strcat(char *dst, const char *src);
|
||||
|
||||
char * strcpy(char *dst, const char *src)
|
||||
{
|
||||
dst = dst;
|
||||
src = src;
|
||||
fprintf(stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n");
|
||||
if (bsafeShouldExit) exit(-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char * strcat(char *dst, const char *src)
|
||||
{
|
||||
dst = dst;
|
||||
src = src;
|
||||
fprintf(stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n");
|
||||
if (bsafeShouldExit) exit(-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
|
||||
char * (gets)(char * buf) {
|
||||
buf = buf;
|
||||
fprintf(stderr, "bsafe error: gets() is not safe, use bgets.\n");
|
||||
if (bsafeShouldExit) exit(-1);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
char * (strncpy)(char *dst, const char *src, size_t n) {
|
||||
dst = dst;
|
||||
src = src;
|
||||
n = n;
|
||||
fprintf(stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n");
|
||||
if (bsafeShouldExit) exit(-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char * (strncat)(char *dst, const char *src, size_t n) {
|
||||
dst = dst;
|
||||
src = src;
|
||||
n = n;
|
||||
fprintf(stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n");
|
||||
if (bsafeShouldExit) exit(-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char * (strtok)(char *s1, const char *s2) {
|
||||
s1 = s1;
|
||||
s2 = s2;
|
||||
fprintf(stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n");
|
||||
if (bsafeShouldExit) exit(-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char * (strdup)(const char *s) {
|
||||
s = s;
|
||||
fprintf(stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n");
|
||||
if (bsafeShouldExit) exit(-1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
43
third_party/HLSLcc/src/cbstring/bsafe.h
vendored
Normal file
43
third_party/HLSLcc/src/cbstring/bsafe.h
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* This source file is part of the bstring string library. This code was
|
||||
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
|
||||
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
|
||||
* for details on usage and license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bsafe.h
|
||||
*
|
||||
* This is an optional module that can be used to help enforce a safety
|
||||
* standard based on pervasive usage of bstrlib. This file is not necessarily
|
||||
* portable, however, it has been tested to work correctly with Intel's C/C++
|
||||
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
|
||||
*/
|
||||
|
||||
#ifndef BSTRLIB_BSAFE_INCLUDE
|
||||
#define BSTRLIB_BSAFE_INCLUDE
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
|
||||
/* This is caught in the linker, so its not necessary for gcc. */
|
||||
extern char * (gets)(char * buf);
|
||||
#endif
|
||||
|
||||
extern char * (strncpy)(char *dst, const char *src, size_t n);
|
||||
extern char * (strncat)(char *dst, const char *src, size_t n);
|
||||
extern char * (strtok)(char *s1, const char *s2);
|
||||
extern char * (strdup)(const char *s);
|
||||
|
||||
#undef strcpy
|
||||
#undef strcat
|
||||
#define strcpy(a, b) bsafe_strcpy(a,b)
|
||||
#define strcat(a, b) bsafe_strcat(a,b)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
1273
third_party/HLSLcc/src/cbstring/bstraux.c
vendored
Normal file
1273
third_party/HLSLcc/src/cbstring/bstraux.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
112
third_party/HLSLcc/src/cbstring/bstraux.h
vendored
Normal file
112
third_party/HLSLcc/src/cbstring/bstraux.h
vendored
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* This source file is part of the bstring string library. This code was
|
||||
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
|
||||
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
|
||||
* for details on usage and license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bstraux.h
|
||||
*
|
||||
* This file is not a necessary part of the core bstring library itself, but
|
||||
* is just an auxilliary module which includes miscellaneous or trivial
|
||||
* functions.
|
||||
*/
|
||||
|
||||
#ifndef BSTRAUX_INCLUDE
|
||||
#define BSTRAUX_INCLUDE
|
||||
|
||||
#include <time.h>
|
||||
#include "bstrlib.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Safety mechanisms */
|
||||
#define bstrDeclare(b) bstring (b) = NULL;
|
||||
#define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }}
|
||||
|
||||
/* Backward compatibilty with previous versions of Bstrlib */
|
||||
#define bAssign(a, b) ((bassign)((a), (b)))
|
||||
#define bSubs(b, pos, len, a, c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c)))
|
||||
#define bStrchr(b, c) ((bstrchr)((b), (c)))
|
||||
#define bStrchrFast(b, c) ((bstrchr)((b), (c)))
|
||||
#define bCatCstr(b, s) ((bcatcstr)((b), (s)))
|
||||
#define bCatBlk(b, s, len) ((bcatblk)((b),(s),(len)))
|
||||
#define bCatStatic(b, s) bCatBlk ((b), ("" s ""), sizeof (s) - 1)
|
||||
#define bTrunc(b, n) ((btrunc)((b), (n)))
|
||||
#define bReplaceAll(b, find, repl, pos) ((bfindreplace)((b),(find),(repl),(pos)))
|
||||
#define bUppercase(b) ((btoupper)(b))
|
||||
#define bLowercase(b) ((btolower)(b))
|
||||
#define bCaselessCmp(a, b) ((bstricmp)((a), (b)))
|
||||
#define bCaselessNCmp(a, b, n) ((bstrnicmp)((a), (b), (n)))
|
||||
#define bBase64Decode(b) (bBase64DecodeEx ((b), NULL))
|
||||
#define bUuDecode(b) (bUuDecodeEx ((b), NULL))
|
||||
|
||||
/* Unusual functions */
|
||||
extern struct bStream * bsFromBstr(const_bstring b);
|
||||
extern bstring bTail(bstring b, int n);
|
||||
extern bstring bHead(bstring b, int n);
|
||||
extern int bSetCstrChar(bstring a, int pos, char c);
|
||||
extern int bSetChar(bstring b, int pos, char c);
|
||||
extern int bFill(bstring a, char c, int len);
|
||||
extern int bReplicate(bstring b, int n);
|
||||
extern int bReverse(bstring b);
|
||||
extern int bInsertChrs(bstring b, int pos, int len, unsigned char c, unsigned char fill);
|
||||
extern bstring bStrfTime(const char * fmt, const struct tm * timeptr);
|
||||
#define bAscTime(t) (bStrfTime ("%c\n", (t)))
|
||||
#define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL)
|
||||
|
||||
/* Spacing formatting */
|
||||
extern int bJustifyLeft(bstring b, int space);
|
||||
extern int bJustifyRight(bstring b, int width, int space);
|
||||
extern int bJustifyMargin(bstring b, int width, int space);
|
||||
extern int bJustifyCenter(bstring b, int width, int space);
|
||||
|
||||
/* Esoteric standards specific functions */
|
||||
extern char * bStr2NetStr(const_bstring b);
|
||||
extern bstring bNetStr2Bstr(const char * buf);
|
||||
extern bstring bBase64Encode(const_bstring b);
|
||||
extern bstring bBase64DecodeEx(const_bstring b, int * boolTruncError);
|
||||
extern struct bStream * bsUuDecode(struct bStream * sInp, int * badlines);
|
||||
extern bstring bUuDecodeEx(const_bstring src, int * badlines);
|
||||
extern bstring bUuEncode(const_bstring src);
|
||||
extern bstring bYEncode(const_bstring src);
|
||||
extern bstring bYDecode(const_bstring src);
|
||||
|
||||
/* Writable stream */
|
||||
typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm);
|
||||
|
||||
struct bwriteStream * bwsOpen(bNwrite writeFn, void * parm);
|
||||
int bwsWriteBstr(struct bwriteStream * stream, const_bstring b);
|
||||
int bwsWriteBlk(struct bwriteStream * stream, void * blk, int len);
|
||||
int bwsWriteFlush(struct bwriteStream * stream);
|
||||
int bwsIsEOF(const struct bwriteStream * stream);
|
||||
int bwsBuffLength(struct bwriteStream * stream, int sz);
|
||||
void * bwsClose(struct bwriteStream * stream);
|
||||
|
||||
/* Security functions */
|
||||
#define bSecureDestroy(b) { \
|
||||
bstring bstr__tmp = (b); \
|
||||
if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \
|
||||
(void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \
|
||||
bdestroy (bstr__tmp); \
|
||||
} \
|
||||
}
|
||||
#define bSecureWriteProtect(t) { \
|
||||
if ((t).mlen >= 0) { \
|
||||
if ((t).mlen > (t).slen)) { \
|
||||
(void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \
|
||||
} \
|
||||
(t).mlen = -1; \
|
||||
} \
|
||||
}
|
||||
extern bstring bSecureInput(int maxlen, int termchar,
|
||||
bNgetc vgetchar, void * vgcCtx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
3280
third_party/HLSLcc/src/cbstring/bstrlib.c
vendored
Normal file
3280
third_party/HLSLcc/src/cbstring/bstrlib.c
vendored
Normal file
File diff suppressed because it is too large
Load Diff
306
third_party/HLSLcc/src/cbstring/bstrlib.h
vendored
Normal file
306
third_party/HLSLcc/src/cbstring/bstrlib.h
vendored
Normal file
@ -0,0 +1,306 @@
|
||||
/*
|
||||
* This source file is part of the bstring string library. This code was
|
||||
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
|
||||
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
|
||||
* for details on usage and license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* bstrlib.h
|
||||
*
|
||||
* This file is the header file for the core module for implementing the
|
||||
* bstring functions.
|
||||
*/
|
||||
|
||||
#ifndef BSTRLIB_INCLUDE
|
||||
#define BSTRLIB_INCLUDE
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#if !defined(BSTRLIB_VSNP_OK) && !defined(BSTRLIB_NOVSNP)
|
||||
# if defined(__TURBOC__) && !defined(__BORLANDC__)
|
||||
# define BSTRLIB_NOVSNP
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define BSTR_ERR (-1)
|
||||
#define BSTR_OK (0)
|
||||
#define BSTR_BS_BUFF_LENGTH_GET (0)
|
||||
|
||||
typedef struct tagbstring * bstring;
|
||||
typedef const struct tagbstring * const_bstring;
|
||||
|
||||
/* Copy functions */
|
||||
#define cstr2bstr bfromcstr
|
||||
extern bstring bfromcstr(const char * str);
|
||||
extern bstring bfromcstralloc(int mlen, const char * str);
|
||||
extern bstring blk2bstr(const void * blk, int len);
|
||||
extern char * bstr2cstr(const_bstring s, char z);
|
||||
extern int bcstrfree(char * s);
|
||||
extern bstring bstrcpy(const_bstring b1);
|
||||
extern int bassign(bstring a, const_bstring b);
|
||||
extern int bassignmidstr(bstring a, const_bstring b, int left, int len);
|
||||
extern int bassigncstr(bstring a, const char * str);
|
||||
extern int bassignblk(bstring a, const void * s, int len);
|
||||
|
||||
/* Destroy function */
|
||||
extern int bdestroy(bstring b);
|
||||
|
||||
/* Space allocation hinting functions */
|
||||
extern int balloc(bstring s, int len);
|
||||
extern int ballocmin(bstring b, int len);
|
||||
|
||||
/* Substring extraction */
|
||||
extern bstring bmidstr(const_bstring b, int left, int len);
|
||||
|
||||
/* Various standard manipulations */
|
||||
extern int bconcat(bstring b0, const_bstring b1);
|
||||
extern int bconchar(bstring b0, char c);
|
||||
extern int bcatcstr(bstring b, const char * s);
|
||||
extern int bcatblk(bstring b, const void * s, int len);
|
||||
extern int binsert(bstring s1, int pos, const_bstring s2, unsigned char fill);
|
||||
extern int binsertch(bstring s1, int pos, int len, unsigned char fill);
|
||||
extern int breplace(bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
|
||||
extern int bdelete(bstring s1, int pos, int len);
|
||||
extern int bsetstr(bstring b0, int pos, const_bstring b1, unsigned char fill);
|
||||
extern int btrunc(bstring b, int n);
|
||||
|
||||
/* Scan/search functions */
|
||||
extern int bstricmp(const_bstring b0, const_bstring b1);
|
||||
extern int bstrnicmp(const_bstring b0, const_bstring b1, int n);
|
||||
extern int biseqcaseless(const_bstring b0, const_bstring b1);
|
||||
extern int bisstemeqcaselessblk(const_bstring b0, const void * blk, int len);
|
||||
extern int biseq(const_bstring b0, const_bstring b1);
|
||||
extern int bisstemeqblk(const_bstring b0, const void * blk, int len);
|
||||
extern int biseqcstr(const_bstring b, const char * s);
|
||||
extern int biseqcstrcaseless(const_bstring b, const char * s);
|
||||
extern int bstrcmp(const_bstring b0, const_bstring b1);
|
||||
extern int bstrncmp(const_bstring b0, const_bstring b1, int n);
|
||||
extern int binstr(const_bstring s1, int pos, const_bstring s2);
|
||||
extern int binstrr(const_bstring s1, int pos, const_bstring s2);
|
||||
extern int binstrcaseless(const_bstring s1, int pos, const_bstring s2);
|
||||
extern int binstrrcaseless(const_bstring s1, int pos, const_bstring s2);
|
||||
extern int bstrchrp(const_bstring b, int c, int pos);
|
||||
extern int bstrrchrp(const_bstring b, int c, int pos);
|
||||
#define bstrchr(b, c) bstrchrp ((b), (c), 0)
|
||||
#define bstrrchr(b, c) bstrrchrp ((b), (c), blength(b)-1)
|
||||
extern int binchr(const_bstring b0, int pos, const_bstring b1);
|
||||
extern int binchrr(const_bstring b0, int pos, const_bstring b1);
|
||||
extern int bninchr(const_bstring b0, int pos, const_bstring b1);
|
||||
extern int bninchrr(const_bstring b0, int pos, const_bstring b1);
|
||||
extern int bfindreplace(bstring b, const_bstring find, const_bstring repl, int pos);
|
||||
extern int bfindreplacecaseless(bstring b, const_bstring find, const_bstring repl, int pos);
|
||||
|
||||
/* List of string container functions */
|
||||
struct bstrList
|
||||
{
|
||||
int qty, mlen;
|
||||
bstring * entry;
|
||||
};
|
||||
extern struct bstrList * bstrListCreate(void);
|
||||
extern int bstrListDestroy(struct bstrList * sl);
|
||||
extern int bstrListAlloc(struct bstrList * sl, int msz);
|
||||
extern int bstrListAllocMin(struct bstrList * sl, int msz);
|
||||
|
||||
/* String split and join functions */
|
||||
extern struct bstrList * bsplit(const_bstring str, unsigned char splitChar);
|
||||
extern struct bstrList * bsplits(const_bstring str, const_bstring splitStr);
|
||||
extern struct bstrList * bsplitstr(const_bstring str, const_bstring splitStr);
|
||||
extern bstring bjoin(const struct bstrList * bl, const_bstring sep);
|
||||
extern int bsplitcb(const_bstring str, unsigned char splitChar, int pos,
|
||||
int (* cb)(void * parm, int ofs, int len), void * parm);
|
||||
extern int bsplitscb(const_bstring str, const_bstring splitStr, int pos,
|
||||
int (* cb)(void * parm, int ofs, int len), void * parm);
|
||||
extern int bsplitstrcb(const_bstring str, const_bstring splitStr, int pos,
|
||||
int (* cb)(void * parm, int ofs, int len), void * parm);
|
||||
|
||||
/* Miscellaneous functions */
|
||||
extern int bpattern(bstring b, int len);
|
||||
extern int btoupper(bstring b);
|
||||
extern int btolower(bstring b);
|
||||
extern int bltrimws(bstring b);
|
||||
extern int brtrimws(bstring b);
|
||||
extern int btrimws(bstring b);
|
||||
|
||||
/* <*>printf format functions */
|
||||
#if !defined(BSTRLIB_NOVSNP)
|
||||
extern bstring bformat(const char * fmt, ...);
|
||||
extern int bformata(bstring b, const char * fmt, ...);
|
||||
extern int bassignformat(bstring b, const char * fmt, ...);
|
||||
extern int bvcformata(bstring b, int count, const char * fmt, va_list arglist);
|
||||
|
||||
#define bvformata(ret, b, fmt, lastarg) { \
|
||||
bstring bstrtmp_b = (b); \
|
||||
const char * bstrtmp_fmt = (fmt); \
|
||||
int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
|
||||
for (;;) { \
|
||||
va_list bstrtmp_arglist; \
|
||||
va_start (bstrtmp_arglist, lastarg); \
|
||||
bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
|
||||
va_end (bstrtmp_arglist); \
|
||||
if (bstrtmp_r >= 0) { /* Everything went ok */ \
|
||||
bstrtmp_r = BSTR_OK; \
|
||||
break; \
|
||||
} else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
|
||||
bstrtmp_r = BSTR_ERR; \
|
||||
break; \
|
||||
} \
|
||||
bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
|
||||
} \
|
||||
ret = bstrtmp_r; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
typedef int (*bNgetc) (void *parm);
|
||||
typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
|
||||
|
||||
/* Input functions */
|
||||
extern bstring bgets(bNgetc getcPtr, void * parm, char terminator);
|
||||
extern bstring bread(bNread readPtr, void * parm);
|
||||
extern int bgetsa(bstring b, bNgetc getcPtr, void * parm, char terminator);
|
||||
extern int bassigngets(bstring b, bNgetc getcPtr, void * parm, char terminator);
|
||||
extern int breada(bstring b, bNread readPtr, void * parm);
|
||||
|
||||
/* Stream functions */
|
||||
extern struct bStream * bsopen(bNread readPtr, void * parm);
|
||||
extern void * bsclose(struct bStream * s);
|
||||
extern int bsbufflength(struct bStream * s, int sz);
|
||||
extern int bsreadln(bstring b, struct bStream * s, char terminator);
|
||||
extern int bsreadlns(bstring r, struct bStream * s, const_bstring term);
|
||||
extern int bsread(bstring b, struct bStream * s, int n);
|
||||
extern int bsreadlna(bstring b, struct bStream * s, char terminator);
|
||||
extern int bsreadlnsa(bstring r, struct bStream * s, const_bstring term);
|
||||
extern int bsreada(bstring b, struct bStream * s, int n);
|
||||
extern int bsunread(struct bStream * s, const_bstring b);
|
||||
extern int bspeek(bstring r, const struct bStream * s);
|
||||
extern int bssplitscb(struct bStream * s, const_bstring splitStr,
|
||||
int (* cb)(void * parm, int ofs, const_bstring entry), void * parm);
|
||||
extern int bssplitstrcb(struct bStream * s, const_bstring splitStr,
|
||||
int (* cb)(void * parm, int ofs, const_bstring entry), void * parm);
|
||||
extern int bseof(const struct bStream * s);
|
||||
|
||||
struct tagbstring
|
||||
{
|
||||
int mlen;
|
||||
int slen;
|
||||
unsigned char * data;
|
||||
};
|
||||
|
||||
/* Accessor macros */
|
||||
#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
|
||||
#define blength(b) (blengthe ((b), 0))
|
||||
#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
|
||||
#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0))
|
||||
#define bdatae(b, e) (bdataofse (b, 0, e))
|
||||
#define bdata(b) (bdataofs (b, 0))
|
||||
#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
|
||||
#define bchar(b, p) bchare ((b), (p), '\0')
|
||||
|
||||
/* Static constant string initialization macro */
|
||||
#define bsStaticMlen(q, m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
|
||||
#if defined(_MSC_VER)
|
||||
/* There are many versions of MSVC which emit __LINE__ as a non-constant. */
|
||||
# define bsStatic(q) bsStaticMlen(q,-32)
|
||||
#endif
|
||||
#ifndef bsStatic
|
||||
# define bsStatic(q) bsStaticMlen(q,-__LINE__)
|
||||
#endif
|
||||
|
||||
/* Static constant block parameter pair */
|
||||
#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
|
||||
|
||||
/* Reference building macros */
|
||||
#define cstr2tbstr btfromcstr
|
||||
#define btfromcstr(t, s) { \
|
||||
(t).data = (unsigned char *) (s); \
|
||||
(t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
|
||||
(t).mlen = -1; \
|
||||
}
|
||||
#define blk2tbstr(t, s, l) { \
|
||||
(t).data = (unsigned char *) (s); \
|
||||
(t).slen = l; \
|
||||
(t).mlen = -1; \
|
||||
}
|
||||
#define btfromblk(t, s, l) blk2tbstr(t,s,l)
|
||||
#define bmid2tbstr(t, b, p, l) { \
|
||||
const_bstring bstrtmp_s = (b); \
|
||||
if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \
|
||||
int bstrtmp_left = (p); \
|
||||
int bstrtmp_len = (l); \
|
||||
if (bstrtmp_left < 0) { \
|
||||
bstrtmp_len += bstrtmp_left; \
|
||||
bstrtmp_left = 0; \
|
||||
} \
|
||||
if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \
|
||||
bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \
|
||||
if (bstrtmp_len <= 0) { \
|
||||
(t).data = (unsigned char *)""; \
|
||||
(t).slen = 0; \
|
||||
} else { \
|
||||
(t).data = bstrtmp_s->data + bstrtmp_left; \
|
||||
(t).slen = bstrtmp_len; \
|
||||
} \
|
||||
} else { \
|
||||
(t).data = (unsigned char *)""; \
|
||||
(t).slen = 0; \
|
||||
} \
|
||||
(t).mlen = -__LINE__; \
|
||||
}
|
||||
#define btfromblkltrimws(t, s, l) { \
|
||||
int bstrtmp_idx = 0, bstrtmp_len = (l); \
|
||||
unsigned char * bstrtmp_s = (s); \
|
||||
if (bstrtmp_s && bstrtmp_len >= 0) { \
|
||||
for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \
|
||||
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
|
||||
} \
|
||||
} \
|
||||
(t).data = bstrtmp_s + bstrtmp_idx; \
|
||||
(t).slen = bstrtmp_len - bstrtmp_idx; \
|
||||
(t).mlen = -__LINE__; \
|
||||
}
|
||||
#define btfromblkrtrimws(t, s, l) { \
|
||||
int bstrtmp_len = (l) - 1; \
|
||||
unsigned char * bstrtmp_s = (s); \
|
||||
if (bstrtmp_s && bstrtmp_len >= 0) { \
|
||||
for (; bstrtmp_len >= 0; bstrtmp_len--) { \
|
||||
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
|
||||
} \
|
||||
} \
|
||||
(t).data = bstrtmp_s; \
|
||||
(t).slen = bstrtmp_len + 1; \
|
||||
(t).mlen = -__LINE__; \
|
||||
}
|
||||
#define btfromblktrimws(t, s, l) { \
|
||||
int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \
|
||||
unsigned char * bstrtmp_s = (s); \
|
||||
if (bstrtmp_s && bstrtmp_len >= 0) { \
|
||||
for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \
|
||||
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
|
||||
} \
|
||||
for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \
|
||||
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
|
||||
} \
|
||||
} \
|
||||
(t).data = bstrtmp_s + bstrtmp_idx; \
|
||||
(t).slen = bstrtmp_len + 1 - bstrtmp_idx; \
|
||||
(t).mlen = -__LINE__; \
|
||||
}
|
||||
|
||||
/* Write protection macros */
|
||||
#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; }
|
||||
#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
|
||||
#define biswriteprotected(t) ((t).mlen <= 0)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
3202
third_party/HLSLcc/src/cbstring/bstrlib.txt
vendored
Normal file
3202
third_party/HLSLcc/src/cbstring/bstrlib.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
29
third_party/HLSLcc/src/cbstring/license.txt
vendored
Normal file
29
third_party/HLSLcc/src/cbstring/license.txt
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
Copyright (c) 2002-2008 Paul Hsieh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
Neither the name of bstrlib nor the names of its contributors may be used
|
||||
to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
172
third_party/HLSLcc/src/cbstring/porting.txt
vendored
Normal file
172
third_party/HLSLcc/src/cbstring/porting.txt
vendored
Normal file
@ -0,0 +1,172 @@
|
||||
Better String library Porting Guide
|
||||
-----------------------------------
|
||||
|
||||
by Paul Hsieh
|
||||
|
||||
The bstring library is an attempt to provide improved string processing
|
||||
functionality to the C and C++ language. At the heart of the bstring library
|
||||
is the management of "bstring"s which are a significant improvement over '\0'
|
||||
terminated char buffers. See the accompanying documenation file bstrlib.txt
|
||||
for more information.
|
||||
|
||||
===============================================================================
|
||||
|
||||
Identifying the Compiler
|
||||
------------------------
|
||||
|
||||
Bstrlib has been tested on the following compilers:
|
||||
|
||||
Microsoft Visual C++
|
||||
Watcom C/C++ (32 bit flat)
|
||||
Intel's C/C++ compiler (on Windows)
|
||||
The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64)
|
||||
Borland C++
|
||||
Turbo C
|
||||
|
||||
There are slight differences in these compilers which requires slight
|
||||
differences in the implementation of Bstrlib. These are accomodated in the
|
||||
same sources using #ifdef/#if defined() on compiler specific macros. To
|
||||
port Bstrlib to a new compiler not listed above, it is recommended that the
|
||||
same strategy be followed. If you are unaware of the compiler specific
|
||||
identifying preprocessor macro for your compiler you might find it here:
|
||||
|
||||
http://predef.sourceforge.net/precomp.html
|
||||
|
||||
Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER.
|
||||
|
||||
16-bit vs. 32-bit vs. 64-bit Systems
|
||||
------------------------------------
|
||||
|
||||
Bstrlib has been architected to deal with strings of length between 0 and
|
||||
INT_MAX (inclusive). Since the values of int are never higher than size_t
|
||||
there will be no issue here. Note that on most 64-bit systems int is 32-bit.
|
||||
|
||||
Dependency on The C-Library
|
||||
---------------------------
|
||||
|
||||
Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and
|
||||
vsnprintf. Many free standing C compiler implementations that have a mode in
|
||||
which the C library is not available will typically not include these
|
||||
functions which will make porting Bstrlib to it onerous. Bstrlib is not
|
||||
designed for such bare bones compiler environments. This usually includes
|
||||
compilers that target ROM environments.
|
||||
|
||||
Porting Issues
|
||||
--------------
|
||||
|
||||
Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there
|
||||
are still a few porting issues. These are described below.
|
||||
|
||||
1. The vsnprintf () function.
|
||||
|
||||
Unfortunately, the earlier ANSI/ISO C standards did not include this function.
|
||||
If the compiler of interest does not support this function then the
|
||||
BSTRLIB_NOVSNP should be defined via something like:
|
||||
|
||||
#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
|
||||
# if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__)
|
||||
# define BSTRLIB_NOVSNP
|
||||
# endif
|
||||
#endif
|
||||
|
||||
which appears at the top of bstrlib.h. Note that the bformat(a) functions
|
||||
will not be declared or implemented if the BSTRLIB_NOVSNP macro is set. If
|
||||
the compiler has renamed vsnprintf() to some other named function, then
|
||||
search for the definition of the exvsnprintf macro in bstrlib.c file and be
|
||||
sure its defined appropriately:
|
||||
|
||||
#if defined (__COMPILERVENDORSPECIFICMACRO__)
|
||||
# define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);}
|
||||
#else
|
||||
# define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);}
|
||||
#endif
|
||||
|
||||
Take notice of the return value being captured in the variable r. It is
|
||||
assumed that r exceeds n if and only if the underlying vsnprintf function has
|
||||
determined what the true maximal output length would be for output if the
|
||||
buffer were large enough to hold it. Non-modern implementations must output a
|
||||
lesser number (the macro can and should be modified to ensure this).
|
||||
|
||||
2. Weak C++ compiler.
|
||||
|
||||
C++ is a much more complicated language to implement than C. This has lead
|
||||
to varying quality of compiler implementations. The weaknesses isolated in
|
||||
the initial ports are inclusion of the Standard Template Library,
|
||||
std::iostream and exception handling. By default it is assumed that the C++
|
||||
compiler supports all of these things correctly. If your compiler does not
|
||||
support one or more of these define the corresponding macro:
|
||||
|
||||
BSTRLIB_CANNOT_USE_STL
|
||||
BSTRLIB_CANNOT_USE_IOSTREAM
|
||||
BSTRLIB_DOESNT_THROW_EXCEPTIONS
|
||||
|
||||
The compiler specific detected macro should be defined at the top of
|
||||
bstrwrap.h in the Configuration defines section. Note that these disabling
|
||||
macros can be overrided with the associated enabling macro if a subsequent
|
||||
version of the compiler gains support. (For example, its possible to rig
|
||||
up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL
|
||||
can be passed in as a compiler option.)
|
||||
|
||||
3. The bsafe module, and reserved words.
|
||||
|
||||
The bsafe module is in gross violation of the ANSI/ISO C standard in the
|
||||
sense that it redefines what could be implemented as reserved words on a
|
||||
given compiler. The typical problem is that a compiler may inline some of the
|
||||
functions and thus not be properly overridden by the definitions in the bsafe
|
||||
module. It is also possible that a compiler may prohibit the redefinitions in
|
||||
the bsafe module. Compiler specific action will be required to deal with
|
||||
these situations.
|
||||
|
||||
Platform Specific Files
|
||||
-----------------------
|
||||
|
||||
The makefiles for the examples are basically setup of for particular
|
||||
environments for each platform. In general these makefiles are not portable
|
||||
and should be constructed as necessary from scratch for each platform.
|
||||
|
||||
Testing a port
|
||||
--------------
|
||||
|
||||
To test that a port compiles correctly do the following:
|
||||
|
||||
1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and
|
||||
bsafe modules.
|
||||
2. Compile bstest against the bstrlib module.
|
||||
3. Run bstest and ensure that 0 errors are reported.
|
||||
4. Compile test against the bstrlib and bstrwrap modules.
|
||||
5. Run test and ensure that 0 errors are reported.
|
||||
6. Compile each of the examples (except for the "re" example, which may be
|
||||
complicated and is not a real test of bstrlib and except for the mfcbench
|
||||
example which is Windows specific.)
|
||||
7. Run each of the examples.
|
||||
|
||||
The builds must have 0 errors, and should have the absolute minimum number of
|
||||
warnings (in most cases can be reduced to 0.) The result of execution should
|
||||
be essentially identical on each platform.
|
||||
|
||||
Performance
|
||||
-----------
|
||||
|
||||
Different CPU and compilers have different capabilities in terms of
|
||||
performance. It is possible for Bstrlib to assume performance
|
||||
characteristics that a platform doesn't have (since it was primarily
|
||||
developed on just one platform). The goal of Bstrlib is to provide very good
|
||||
performance on all platforms regardless of this but without resorting to
|
||||
extreme measures (such as using assembly language, or non-portable intrinsics
|
||||
or library extensions.)
|
||||
|
||||
There are two performance benchmarks that can be found in the example/
|
||||
directory. They are: cbench.c and cppbench.cpp. These are variations and
|
||||
expansions of a benchmark for another string library. They don't cover all
|
||||
string functionality, but do include the most basic functions which will be
|
||||
common in most string manipulation kernels.
|
||||
|
||||
...............................................................................
|
||||
|
||||
Feedback
|
||||
--------
|
||||
|
||||
In all cases, you may email issues found to the primary author of Bstrlib at
|
||||
the email address: websnarf@users.sourceforge.net
|
||||
|
||||
===============================================================================
|
221
third_party/HLSLcc/src/cbstring/security.txt
vendored
Normal file
221
third_party/HLSLcc/src/cbstring/security.txt
vendored
Normal file
@ -0,0 +1,221 @@
|
||||
Better String library Security Statement
|
||||
----------------------------------------
|
||||
|
||||
by Paul Hsieh
|
||||
|
||||
===============================================================================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The Better String library (hereafter referred to as Bstrlib) is an attempt to
|
||||
provide improved string processing functionality to the C and C++ languages.
|
||||
At the heart of the Bstrlib is the management of "bstring"s which are a
|
||||
significant improvement over '\0' terminated char buffers. See the
|
||||
accompanying documenation file bstrlib.txt for more information.
|
||||
|
||||
DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
||||
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
|
||||
NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
Like any software, there is always a possibility of failure due to a flawed
|
||||
implementation. Nevertheless a good faith effort has been made to minimize
|
||||
such flaws in Bstrlib. Also, use of Bstrlib by itself will not make an
|
||||
application secure or free from implementation failures. However, it is the
|
||||
author's conviction that use of Bstrlib can greatly facilitate the creation
|
||||
of software meeting the highest possible standards of security.
|
||||
|
||||
Part of the reason why this document has been created, is for the purpose of
|
||||
security auditing, or the creation of further "Statements on Security" for
|
||||
software that is created that uses Bstrlib. An auditor may check the claims
|
||||
below against Bstrlib, and use this as a basis for analysis of software which
|
||||
uses Bstrlib.
|
||||
|
||||
===============================================================================
|
||||
|
||||
Statement on Security
|
||||
---------------------
|
||||
|
||||
This is a document intended to give consumers of the Better String Library
|
||||
who are interested in security an idea of where the Better String Library
|
||||
stands on various security issues. Any deviation observed in the actual
|
||||
library itself from the descriptions below should be considered an
|
||||
implementation error, not a design flaw.
|
||||
|
||||
This statement is not an analytical proof of correctness or an outline of one
|
||||
but rather an assertion similar to a scientific claim or hypothesis. By use,
|
||||
testing and open independent examination (otherwise known as scientific
|
||||
falsifiability), the credibility of the claims made below can rise to the
|
||||
level of an established theory.
|
||||
|
||||
Common security issues:
|
||||
.......................
|
||||
|
||||
1. Buffer Overflows
|
||||
|
||||
The Bstrlib API allows the programmer a way to deal with strings without
|
||||
having to deal with the buffers containing them. Ordinary usage of the
|
||||
Bstrlib API itself makes buffer overflows impossible.
|
||||
|
||||
Furthermore, the Bstrlib API has a superset of basic string functionality as
|
||||
compared to the C library's char * functions, C++'s std::string class and
|
||||
Microsoft's MFC based CString class. It also has abstracted mechanisms for
|
||||
dealing with IO. This is important as it gives developers a way of migrating
|
||||
all their code from a functionality point of view.
|
||||
|
||||
2. Memory size overflow/wrap around attack
|
||||
|
||||
Bstrlib is, by design, impervious to memory size overflow attacks. The
|
||||
reason is it is resiliant to length overflows is that bstring lengths are
|
||||
bounded above by INT_MAX, instead of ~(size_t)0. So length addition
|
||||
overflows cause a wrap around of the integer value making them negative
|
||||
causing balloc() to fail before an erroneous operation can occurr. Attempted
|
||||
conversions of char * strings which may have lengths greater than INT_MAX are
|
||||
detected and the conversion is aborted.
|
||||
|
||||
It is unknown if this property holds on machines that don't represent
|
||||
integers as 2s complement. It is recommended that Bstrlib be carefully
|
||||
auditted by anyone using a system which is not 2s complement based.
|
||||
|
||||
3. Constant string protection
|
||||
|
||||
Bstrlib implements runtime enforced constant and read-only string semantics.
|
||||
I.e., bstrings which are declared as constant via the bsStatic() macro cannot
|
||||
be modified or deallocated directly through the Bstrlib API, and this cannot
|
||||
be subverted by casting or other type coercion. This is independent of the
|
||||
use of the const_bstring data type.
|
||||
|
||||
The Bstrlib C API uses the type const_bstring to specify bstring parameters
|
||||
whose contents do not change. Although the C language cannot enforce this,
|
||||
this is nevertheless guaranteed by the implementation of the Bstrlib library
|
||||
of C functions. The C++ API enforces the const attribute on CBString types
|
||||
correctly.
|
||||
|
||||
4. Aliased bstring support
|
||||
|
||||
Bstrlib detects and supports aliased parameter management throughout the API.
|
||||
The kind of aliasing that is allowed is the one where pointers of the same
|
||||
basic type may be pointing to overlapping objects (this is the assumption the
|
||||
ANSI C99 specification makes.) Each function behaves as if all read-only
|
||||
parameters were copied to temporaries which are used in their stead before
|
||||
the function is enacted (it rarely actually does this). No function in the
|
||||
Bstrlib uses the "restrict" parameter attribute from the ANSI C99
|
||||
specification.
|
||||
|
||||
5. Information leaking
|
||||
|
||||
In bstraux.h, using the semantically equivalent macros bSecureDestroy() and
|
||||
bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively
|
||||
will ensure that stale data does not linger in the heap's free space after
|
||||
strings have been released back to memory. Created bstrings or CBStrings
|
||||
are not linked to anything external to themselves, and thus cannot expose
|
||||
deterministic data leaking. If a bstring is resized, the preimage may exist
|
||||
as a copy that is released to the heap. Thus for sensitive data, the bstring
|
||||
should be sufficiently presized before manipulated so that it is not resized.
|
||||
bSecureInput() has been supplied in bstraux.c, which can be used to obtain
|
||||
input securely without any risk of leaving any part of the input image in the
|
||||
heap except for the allocated bstring that is returned.
|
||||
|
||||
6. Memory leaking
|
||||
|
||||
Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG
|
||||
macro. User generated definitions for malloc, realloc and free can then be
|
||||
supplied which can implement special strategies for memory corruption
|
||||
detection or memory leaking. Otherwise, bstrlib does not do anything out of
|
||||
the ordinary to attempt to deal with the standard problem of memory leaking
|
||||
(i.e., losing references to allocated memory) when programming in the C and
|
||||
C++ languages. However, it does not compound the problem any more than exists
|
||||
either, as it doesn't have any intrinsic inescapable leaks in it. Bstrlib
|
||||
does not preclude the use of automatic garbage collection mechanisms such as
|
||||
the Boehm garbage collector.
|
||||
|
||||
7. Encryption
|
||||
|
||||
Bstrlib does not present any built-in encryption mechanism. However, it
|
||||
supports full binary contents in its data buffers, so any standard block
|
||||
based encryption mechanism can make direct use of bstrings/CBStrings for
|
||||
buffer management.
|
||||
|
||||
8. Double freeing
|
||||
|
||||
Freeing a pointer that is already free is an extremely rare, but nevertheless
|
||||
a potentially ruthlessly corrupting operation (its possible to cause Win 98 to
|
||||
reboot, by calling free mulitiple times on already freed data using the WATCOM
|
||||
CRT.) Bstrlib invalidates the bstring header data before freeing, so that in
|
||||
many cases a double free will be detected and an error will be reported
|
||||
(though this behaviour is not guaranteed and should not be relied on).
|
||||
|
||||
Using bstrFree pervasively (instead of bdestroy) can lead to somewhat
|
||||
improved invalid free avoidance (it is completely safe whenever bstring
|
||||
instances are only stored in unique variables). For example:
|
||||
|
||||
struct tagbstring hw = bsStatic ("Hello, world");
|
||||
bstring cpHw = bstrcpy (&hw);
|
||||
|
||||
#ifdef NOT_QUITE_AS_SAFE
|
||||
bdestroy (cpHw); /* Never fail */
|
||||
bdestroy (cpHw); /* Error sometimes detected at runtime */
|
||||
bdestroy (&hw); /* Error detected at run time */
|
||||
#else
|
||||
bstrFree (cpHw); /* Never fail */
|
||||
bstrFree (cpHw); /* Will do nothing */
|
||||
bstrFree (&hw); /* Will lead to a compile time error */
|
||||
#endif
|
||||
|
||||
9. Resource based denial of service
|
||||
|
||||
bSecureInput() has been supplied in bstraux.c. It has an optional upper limit
|
||||
for input length. But unlike fgets(), it is also easily determined if the
|
||||
buffer has been truncated early. In this way, a program can set an upper limit
|
||||
on input sizes while still allowing for implementing context specific
|
||||
truncation semantics (i.e., does the program consume but dump the extra
|
||||
input, or does it consume it in later inputs?)
|
||||
|
||||
10. Mixing char *'s and bstrings
|
||||
|
||||
The bstring and char * representations are not identical. So there is a risk
|
||||
when converting back and forth that data may lost. Essentially bstrings can
|
||||
contain '\0' as a valid non-terminating character, while char * strings
|
||||
cannot and in fact must use the character as a terminator. The risk of data
|
||||
loss is very low, since:
|
||||
|
||||
A) the simple method of only using bstrings in a char * semantically
|
||||
compatible way is both easy to achieve and pervasively supported.
|
||||
B) obtaining '\0' content in a string is either deliberate or indicative
|
||||
of another, likely more serious problem in the code.
|
||||
C) the library comes with various functions which deal with this issue
|
||||
(namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ())
|
||||
|
||||
Marginal security issues:
|
||||
.........................
|
||||
|
||||
11. 8-bit versus 9-bit portability
|
||||
|
||||
Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent
|
||||
possible to avoid portability problems. However, Bstrlib has not been tested
|
||||
on any system that does not represent char as 8-bits. So whether or not it
|
||||
works on 9-bit systems is an open question. It is recommended that Bstrlib be
|
||||
carefully auditted by anyone using a system in which CHAR_BIT is not 8.
|
||||
|
||||
12. EBCDIC/ASCII/UTF-8 data representation attacks.
|
||||
|
||||
Bstrlib uses ctype.h functions to ensure that it remains portable to non-
|
||||
ASCII systems. It also checks range to make sure it is well defined even for
|
||||
data that ANSI does not define for the ctype functions.
|
||||
|
||||
Obscure issues:
|
||||
...............
|
||||
|
||||
13. Data attributes
|
||||
|
||||
There is no support for a Perl-like "taint" attribute, however, an example of
|
||||
how to do this using C++'s type system is given as an example.
|
||||
|
1635
third_party/HLSLcc/src/decode.cpp
vendored
Normal file
1635
third_party/HLSLcc/src/decode.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
151
third_party/HLSLcc/src/internal_includes/ControlFlowGraph.h
vendored
Normal file
151
third_party/HLSLcc/src/internal_includes/ControlFlowGraph.h
vendored
Normal file
@ -0,0 +1,151 @@
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct Instruction;
|
||||
class Operand;
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
namespace ControlFlow
|
||||
{
|
||||
class BasicBlock;
|
||||
|
||||
class ControlFlowGraph
|
||||
{
|
||||
friend class BasicBlock;
|
||||
public:
|
||||
ControlFlowGraph()
|
||||
: m_BlockMap()
|
||||
, m_BlockStorage()
|
||||
{}
|
||||
|
||||
typedef std::vector<shared_ptr<BasicBlock> > BasicBlockStorage;
|
||||
|
||||
const BasicBlock &Build(const Instruction* firstInstruction, const Instruction* endInstruction);
|
||||
|
||||
// Only works for instructions that start the basic block
|
||||
const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const;
|
||||
|
||||
// non-const version for BasicBlock
|
||||
BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction);
|
||||
|
||||
const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; }
|
||||
private:
|
||||
|
||||
// Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block
|
||||
typedef std::map<const Instruction *, BasicBlock *> BasicBlockMap;
|
||||
|
||||
BasicBlockMap m_BlockMap;
|
||||
|
||||
// auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these
|
||||
BasicBlockStorage m_BlockStorage;
|
||||
};
|
||||
|
||||
|
||||
class BasicBlock
|
||||
{
|
||||
friend class ControlFlowGraph;
|
||||
public:
|
||||
// A set of register indices, one per each vec4 component per register
|
||||
typedef std::set<uint32_t> RegisterSet;
|
||||
// The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block
|
||||
typedef std::set<const Instruction *> ConnectionSet;
|
||||
|
||||
struct Definition
|
||||
{
|
||||
Definition(const Instruction* i = nullptr, const Operand* o = nullptr)
|
||||
: m_Instruction(i)
|
||||
, m_Operand(o)
|
||||
{}
|
||||
|
||||
Definition(const Definition& a) = default;
|
||||
Definition(Definition&& a) = default;
|
||||
~Definition() = default;
|
||||
|
||||
Definition& operator=(const Definition& a) = default;
|
||||
Definition& operator=(Definition&& a) = default;
|
||||
|
||||
bool operator==(const Definition& a) const
|
||||
{
|
||||
if (a.m_Instruction != m_Instruction)
|
||||
return false;
|
||||
return a.m_Operand == m_Operand;
|
||||
}
|
||||
|
||||
bool operator!=(const Definition& a) const
|
||||
{
|
||||
if (a.m_Instruction == m_Instruction)
|
||||
return false;
|
||||
return a.m_Operand != m_Operand;
|
||||
}
|
||||
|
||||
bool operator<(const Definition& a) const
|
||||
{
|
||||
if (m_Instruction != a.m_Instruction)
|
||||
return m_Instruction < a.m_Instruction;
|
||||
return m_Operand < a.m_Operand;
|
||||
}
|
||||
|
||||
const Instruction *m_Instruction;
|
||||
const Operand *m_Operand;
|
||||
};
|
||||
|
||||
typedef std::set<Definition> ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable
|
||||
typedef std::map<uint32_t, ReachableDefinitionsPerVariable> ReachableVariables; // A VisibleDefinitionSet for each variable*component.
|
||||
|
||||
const Instruction *First() const { return m_First; }
|
||||
const Instruction *Last() const { return m_Last; }
|
||||
|
||||
const RegisterSet &UEVar() const { return m_UEVar; }
|
||||
const RegisterSet &VarKill() const { return m_VarKill; }
|
||||
|
||||
const ConnectionSet &Preceding() const { return m_Preceding; }
|
||||
const ConnectionSet &Succeeding() const { return m_Succeeding; }
|
||||
|
||||
const ReachableVariables &DEDef() const { return m_DEDef; }
|
||||
const ReachableVariables &Reachable() const { return m_Reachable; }
|
||||
|
||||
// Helper function: Do union of 2 ReachableVariables, store result in a.
|
||||
static void RVarUnion(ReachableVariables &a, const ReachableVariables &b);
|
||||
|
||||
private:
|
||||
|
||||
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build()
|
||||
BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* psEnd);
|
||||
|
||||
// Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already.
|
||||
void Build();
|
||||
|
||||
bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed.
|
||||
|
||||
|
||||
BasicBlock * AddChildBasicBlock(const Instruction *psFirst);
|
||||
|
||||
private:
|
||||
ControlFlowGraph &m_Graph; // The graph object containing this block
|
||||
|
||||
const Instruction *m_First; // The first instruction in the basic block
|
||||
const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction
|
||||
const Instruction *m_End; // past-the-end pointer
|
||||
|
||||
RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block)
|
||||
RegisterSet m_VarKill; // Set of variables that are defined in this block.
|
||||
|
||||
ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG
|
||||
ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG
|
||||
|
||||
ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set.
|
||||
|
||||
ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block.
|
||||
};
|
||||
}
|
||||
}
|
30
third_party/HLSLcc/src/internal_includes/ControlFlowGraphUtils.h
vendored
Normal file
30
third_party/HLSLcc/src/internal_includes/ControlFlowGraphUtils.h
vendored
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
struct Instruction;
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
namespace ControlFlow
|
||||
{
|
||||
class Utils
|
||||
{
|
||||
public:
|
||||
// For a given flow-control instruction, find the corresponding jump location:
|
||||
// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
|
||||
// For ELSE, find same level ENDIF + 1
|
||||
// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
|
||||
// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
|
||||
// For ENDLOOP, find previous same-level LOOP + 1
|
||||
// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
|
||||
// For CONTINUE/C the previous LOOP + 1
|
||||
// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
|
||||
// Note that CASE labels fall through.
|
||||
// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
|
||||
// If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH
|
||||
// If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it.
|
||||
static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0);
|
||||
|
||||
static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0);
|
||||
};
|
||||
}
|
||||
}
|
15
third_party/HLSLcc/src/internal_includes/DataTypeAnalysis.h
vendored
Normal file
15
third_party/HLSLcc/src/internal_includes/DataTypeAnalysis.h
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include "include/ShaderInfo.h"
|
||||
#include <vector>
|
||||
|
||||
class HLSLCrossCompilerContext;
|
||||
struct Instruction;
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
namespace DataTypeAnalysis
|
||||
{
|
||||
void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> &instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results);
|
||||
}
|
||||
}
|
118
third_party/HLSLcc/src/internal_includes/Declaration.h
vendored
Normal file
118
third_party/HLSLcc/src/internal_includes/Declaration.h
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "internal_includes/Operand.h"
|
||||
|
||||
typedef struct ICBVec4_TAG
|
||||
{
|
||||
uint32_t a;
|
||||
uint32_t b;
|
||||
uint32_t c;
|
||||
uint32_t d;
|
||||
} ICBVec4;
|
||||
|
||||
#define ACCESS_FLAG_READ 0x1
|
||||
#define ACCESS_FLAG_WRITE 0x2
|
||||
#define ACCESS_FLAG_ATOMIC 0x4
|
||||
|
||||
struct Declaration
|
||||
{
|
||||
Declaration() :
|
||||
eOpcode(OPCODE_INVALID),
|
||||
ui32NumOperands(0),
|
||||
ui32BufferStride(0),
|
||||
ui32TableLength(0),
|
||||
ui32IsShadowTex(0)
|
||||
{}
|
||||
|
||||
OPCODE_TYPE eOpcode;
|
||||
|
||||
uint32_t ui32NumOperands;
|
||||
|
||||
Operand asOperands[2];
|
||||
|
||||
std::vector<ICBVec4> asImmediateConstBuffer;
|
||||
//The declaration can set one of these
|
||||
//values depending on the opcode.
|
||||
union
|
||||
{
|
||||
uint32_t ui32GlobalFlags;
|
||||
uint32_t ui32NumTemps;
|
||||
RESOURCE_DIMENSION eResourceDimension;
|
||||
INTERPOLATION_MODE eInterpolation;
|
||||
PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology;
|
||||
PRIMITIVE eInputPrimitive;
|
||||
uint32_t ui32MaxOutputVertexCount;
|
||||
TESSELLATOR_DOMAIN eTessDomain;
|
||||
TESSELLATOR_PARTITIONING eTessPartitioning;
|
||||
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
|
||||
uint32_t aui32WorkGroupSize[3];
|
||||
uint32_t ui32HullPhaseInstanceCount;
|
||||
float fMaxTessFactor;
|
||||
uint32_t ui32IndexRange;
|
||||
uint32_t ui32GSInstanceCount;
|
||||
SB_SAMPLER_MODE eSamplerMode; // For sampler declarations, the sampler mode.
|
||||
|
||||
struct Interface_TAG
|
||||
{
|
||||
uint32_t ui32InterfaceID;
|
||||
uint32_t ui32NumFuncTables;
|
||||
uint32_t ui32ArraySize;
|
||||
} iface;
|
||||
} value;
|
||||
|
||||
uint32_t ui32BufferStride;
|
||||
|
||||
struct UAV_TAG
|
||||
{
|
||||
UAV_TAG() :
|
||||
ui32GloballyCoherentAccess(0),
|
||||
bCounter(0),
|
||||
Type(RETURN_TYPE_UNORM),
|
||||
ui32NumComponents(0),
|
||||
ui32AccessFlags(0)
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t ui32GloballyCoherentAccess;
|
||||
uint8_t bCounter;
|
||||
RESOURCE_RETURN_TYPE Type;
|
||||
uint32_t ui32NumComponents;
|
||||
uint32_t ui32AccessFlags;
|
||||
} sUAV;
|
||||
|
||||
struct TGSM_TAG
|
||||
{
|
||||
uint32_t ui32Stride;
|
||||
uint32_t ui32Count;
|
||||
|
||||
TGSM_TAG() :
|
||||
ui32Stride(0),
|
||||
ui32Count(0)
|
||||
{
|
||||
}
|
||||
} sTGSM;
|
||||
|
||||
struct IndexableTemp_TAG
|
||||
{
|
||||
uint32_t ui32RegIndex;
|
||||
uint32_t ui32RegCount;
|
||||
uint32_t ui32RegComponentSize;
|
||||
|
||||
IndexableTemp_TAG() :
|
||||
ui32RegIndex(0),
|
||||
ui32RegCount(0),
|
||||
ui32RegComponentSize(0)
|
||||
{
|
||||
}
|
||||
} sIdxTemp;
|
||||
|
||||
uint32_t ui32TableLength;
|
||||
|
||||
uint32_t ui32IsShadowTex;
|
||||
|
||||
// Set indexed by sampler register number.
|
||||
std::set<uint32_t> samplersUsed;
|
||||
};
|
81
third_party/HLSLcc/src/internal_includes/HLSLCrossCompilerContext.h
vendored
Normal file
81
third_party/HLSLcc/src/internal_includes/HLSLCrossCompilerContext.h
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "bstrlib.h"
|
||||
|
||||
class Shader;
|
||||
class GLSLCrossDependencyData;
|
||||
class ShaderPhase;
|
||||
class Translator;
|
||||
class Operand;
|
||||
class HLSLccReflection;
|
||||
|
||||
class HLSLCrossCompilerContext
|
||||
{
|
||||
public:
|
||||
HLSLCrossCompilerContext(HLSLccReflection &refl) :
|
||||
glsl(nullptr),
|
||||
extensions(nullptr),
|
||||
beforeMain(nullptr),
|
||||
currentGLSLString(nullptr),
|
||||
currentPhase(0),
|
||||
indent(0),
|
||||
flags(0),
|
||||
psShader(nullptr),
|
||||
psDependencies(nullptr),
|
||||
inputPrefix(nullptr),
|
||||
outputPrefix(nullptr),
|
||||
psTranslator(nullptr),
|
||||
m_Reflection(refl)
|
||||
{}
|
||||
|
||||
bstring glsl;
|
||||
bstring extensions;
|
||||
bstring beforeMain;
|
||||
|
||||
bstring* currentGLSLString;//either glsl or earlyMain of current phase
|
||||
|
||||
uint32_t currentPhase;
|
||||
|
||||
int indent;
|
||||
unsigned int flags;
|
||||
|
||||
// Helper functions for checking flags
|
||||
// Returns true if VULKAN_BINDINGS flag is set
|
||||
bool IsVulkan() const;
|
||||
|
||||
// Helper functions for checking flags
|
||||
// Returns true if HLSLCC_FLAG_NVN_TARGET flag is set
|
||||
bool IsSwitch() const;
|
||||
|
||||
Shader* psShader;
|
||||
GLSLCrossDependencyData* psDependencies;
|
||||
const char *inputPrefix; // Prefix for shader inputs
|
||||
const char *outputPrefix; // Prefix for shader outputs
|
||||
|
||||
void DoDataTypeAnalysis(ShaderPhase *psPhase);
|
||||
void ReserveFramebufferFetchInputs();
|
||||
|
||||
void ClearDependencyData();
|
||||
|
||||
void AddIndentation();
|
||||
|
||||
// Currently active translator
|
||||
Translator *psTranslator;
|
||||
|
||||
HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info
|
||||
|
||||
// Retrieve the name for which the input or output is declared as. Takes into account possible redirections.
|
||||
std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const;
|
||||
std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const;
|
||||
|
||||
bool OutputNeedsDeclaring(const Operand* psOperand, const int count);
|
||||
|
||||
bool RequireExtension(const std::string &extName);
|
||||
bool EnableExtension(const std::string &extName);
|
||||
|
||||
private:
|
||||
std::set<std::string> m_EnabledExtensions;
|
||||
};
|
134
third_party/HLSLcc/src/internal_includes/HLSLccToolkit.h
vendored
Normal file
134
third_party/HLSLcc/src/internal_includes/HLSLccToolkit.h
vendored
Normal file
@ -0,0 +1,134 @@
|
||||
#pragma once
|
||||
#include "hlslcc.h"
|
||||
#include "bstrlib.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/Operand.h"
|
||||
|
||||
class HLSLCrossCompilerContext;
|
||||
struct ConstantBuffer;
|
||||
|
||||
namespace HLSLcc
|
||||
{
|
||||
uint32_t GetNumberBitsSet(uint32_t a);
|
||||
|
||||
uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType);
|
||||
|
||||
SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags);
|
||||
|
||||
const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true);
|
||||
|
||||
const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision);
|
||||
|
||||
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components);
|
||||
|
||||
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows);
|
||||
|
||||
void AddSwizzleUsingElementCount(bstring dest, uint32_t count);
|
||||
|
||||
int WriteMaskToComponentCount(uint32_t writeMask);
|
||||
|
||||
uint32_t BuildComponentMaskFromElementCount(int count);
|
||||
|
||||
// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
|
||||
bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src);
|
||||
|
||||
// Convert resource return type to SVT_ flags
|
||||
uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType);
|
||||
|
||||
SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec);
|
||||
|
||||
RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type);
|
||||
|
||||
REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type);
|
||||
|
||||
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount);
|
||||
|
||||
bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode);
|
||||
|
||||
bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB);
|
||||
|
||||
int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim);
|
||||
|
||||
SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b);
|
||||
|
||||
// Returns true if the instruction adds 1 to the destination temp register
|
||||
bool IsAddOneInstruction(const Instruction *psInst);
|
||||
|
||||
bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest);
|
||||
|
||||
bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf);
|
||||
|
||||
// Helper function to print floats with full precision
|
||||
void PrintFloat(bstring b, float f);
|
||||
|
||||
bstring GetEarlyMain(HLSLCrossCompilerContext *psContext);
|
||||
bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext);
|
||||
|
||||
// Flags for ForeachOperand
|
||||
// Process suboperands
|
||||
#define FEO_FLAG_SUBOPERAND 1
|
||||
// Process src operands
|
||||
#define FEO_FLAG_SRC_OPERAND 2
|
||||
// Process destination operands
|
||||
#define FEO_FLAG_DEST_OPERAND 4
|
||||
// Convenience: Process all operands, both src and dest, and all suboperands
|
||||
#define FEO_FLAG_ALL (FEO_FLAG_SUBOPERAND | FEO_FLAG_SRC_OPERAND | FEO_FLAG_DEST_OPERAND)
|
||||
|
||||
// For_each for all operands within a range of instructions. Flags above.
|
||||
template<typename ItrType, typename F> void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback)
|
||||
{
|
||||
ItrType inst = _begin;
|
||||
while (inst != _end)
|
||||
{
|
||||
uint32_t i, k;
|
||||
|
||||
if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
|
||||
{
|
||||
for (i = 0; i < inst->ui32FirstSrc; i++)
|
||||
{
|
||||
if (flags & FEO_FLAG_SUBOPERAND)
|
||||
{
|
||||
for (k = 0; k < MAX_SUB_OPERANDS; k++)
|
||||
{
|
||||
if (inst->asOperands[i].m_SubOperands[k].get())
|
||||
{
|
||||
callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (flags & FEO_FLAG_DEST_OPERAND)
|
||||
{
|
||||
callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
|
||||
{
|
||||
for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++)
|
||||
{
|
||||
if (flags & FEO_FLAG_SUBOPERAND)
|
||||
{
|
||||
for (k = 0; k < MAX_SUB_OPERANDS; k++)
|
||||
{
|
||||
if (inst->asOperands[i].m_SubOperands[k].get())
|
||||
{
|
||||
callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (flags & FEO_FLAG_SRC_OPERAND)
|
||||
{
|
||||
callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inst++;
|
||||
}
|
||||
}
|
||||
}
|
184
third_party/HLSLcc/src/internal_includes/Instruction.h
vendored
Normal file
184
third_party/HLSLcc/src/internal_includes/Instruction.h
vendored
Normal file
@ -0,0 +1,184 @@
|
||||
#pragma once
|
||||
|
||||
#include "internal_includes/Operand.h"
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "include/ShaderInfo.h"
|
||||
#include <memory>
|
||||
|
||||
#define ATOMIC_ADDRESS_BASIC 0
|
||||
#define ATOMIC_ADDRESS_ARRAY_DYNAMIC 1
|
||||
#define ATOMIC_ADDRESS_STRUCT_DYNAMIC 2
|
||||
|
||||
#define TEXSMP_FLAG_NONE 0x0
|
||||
#define TEXSMP_FLAG_LOD 0x1 //LOD comes from operand
|
||||
#define TEXSMP_FLAG_DEPTHCOMPARE 0x2
|
||||
#define TEXSMP_FLAG_FIRSTLOD 0x4 //LOD is 0
|
||||
#define TEXSMP_FLAG_BIAS 0x8
|
||||
#define TEXSMP_FLAG_GRAD 0x10
|
||||
//Gather specific flags
|
||||
#define TEXSMP_FLAG_GATHER 0x20
|
||||
#define TEXSMP_FLAG_PARAMOFFSET 0x40 //Offset comes from operand
|
||||
|
||||
struct Instruction
|
||||
{
|
||||
Instruction() :
|
||||
eOpcode(OPCODE_NOP),
|
||||
eBooleanTestType(INSTRUCTION_TEST_ZERO),
|
||||
ui32NumOperands(0),
|
||||
ui32FirstSrc(0),
|
||||
m_Uses(),
|
||||
m_SkipTranslation(false),
|
||||
m_InductorRegister(0),
|
||||
bSaturate(0),
|
||||
ui32SyncFlags(0),
|
||||
ui32PreciseMask(0),
|
||||
ui32FuncIndexWithinInterface(0),
|
||||
eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT),
|
||||
bAddressOffset(0),
|
||||
iUAddrOffset(0),
|
||||
iVAddrOffset(0),
|
||||
iWAddrOffset(0),
|
||||
xType(RETURN_TYPE_UNUSED),
|
||||
yType(RETURN_TYPE_UNUSED),
|
||||
zType(RETURN_TYPE_UNUSED),
|
||||
wType(RETURN_TYPE_UNUSED),
|
||||
eResDim(RESOURCE_DIMENSION_UNKNOWN),
|
||||
iCausedSplit(0),
|
||||
id(0)
|
||||
{
|
||||
m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
|
||||
}
|
||||
|
||||
// For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT)
|
||||
Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) :
|
||||
ui32SyncFlags(0),
|
||||
bSaturate(0),
|
||||
ui32PreciseMask(0),
|
||||
ui32FuncIndexWithinInterface(0),
|
||||
eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT),
|
||||
bAddressOffset(0),
|
||||
iUAddrOffset(0),
|
||||
iVAddrOffset(0),
|
||||
iWAddrOffset(0),
|
||||
xType(RETURN_TYPE_UNUSED),
|
||||
yType(RETURN_TYPE_UNUSED),
|
||||
zType(RETURN_TYPE_UNUSED),
|
||||
wType(RETURN_TYPE_UNUSED),
|
||||
eResDim(RESOURCE_DIMENSION_UNKNOWN),
|
||||
iCausedSplit(0)
|
||||
{
|
||||
id = _id;
|
||||
eOpcode = opcode;
|
||||
eBooleanTestType = INSTRUCTION_TEST_ZERO;
|
||||
ui32FirstSrc = 0;
|
||||
ui32NumOperands = 0;
|
||||
m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
|
||||
m_SkipTranslation = false;
|
||||
m_InductorRegister = 0;
|
||||
|
||||
if (reg1Mask == 0)
|
||||
return;
|
||||
|
||||
ui32NumOperands++;
|
||||
asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP;
|
||||
asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1;
|
||||
asOperands[0].ui32CompMask = reg1Mask;
|
||||
asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
|
||||
|
||||
if (reg2Mask == 0)
|
||||
return;
|
||||
|
||||
ui32FirstSrc = 1;
|
||||
ui32NumOperands++;
|
||||
|
||||
asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
|
||||
asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2;
|
||||
asOperands[1].ui32CompMask = reg2Mask;
|
||||
asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
|
||||
|
||||
if (reg3Mask == 0)
|
||||
return;
|
||||
ui32NumOperands++;
|
||||
|
||||
asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
|
||||
asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3;
|
||||
asOperands[2].ui32CompMask = reg3Mask;
|
||||
asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
|
||||
|
||||
if (reg4Mask == 0)
|
||||
return;
|
||||
ui32NumOperands++;
|
||||
|
||||
asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
|
||||
asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4;
|
||||
asOperands[3].ui32CompMask = reg4Mask;
|
||||
asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
|
||||
}
|
||||
|
||||
// Returns true if this instruction is a conditional branch
|
||||
bool IsConditionalBranchInstruction() const
|
||||
{
|
||||
switch (eOpcode)
|
||||
{
|
||||
case OPCODE_IF:
|
||||
case OPCODE_BREAKC:
|
||||
case OPCODE_CONTINUEC:
|
||||
case OPCODE_RETC:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const;
|
||||
|
||||
// Flags for ChangeOperandTempRegister
|
||||
#define UD_CHANGE_SUBOPERANDS 1
|
||||
#define UD_CHANGE_MAIN_OPERAND 2
|
||||
#define UD_CHANGE_ALL 3
|
||||
|
||||
void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase);
|
||||
|
||||
|
||||
OPCODE_TYPE eOpcode;
|
||||
INSTRUCTION_TEST_BOOLEAN eBooleanTestType;
|
||||
uint32_t ui32SyncFlags;
|
||||
uint32_t ui32NumOperands;
|
||||
uint32_t ui32FirstSrc;
|
||||
Operand asOperands[6];
|
||||
uint32_t bSaturate;
|
||||
uint32_t ui32PreciseMask;
|
||||
uint32_t ui32FuncIndexWithinInterface;
|
||||
RESINFO_RETURN_TYPE eResInfoReturnType;
|
||||
|
||||
int bAddressOffset;
|
||||
int8_t iUAddrOffset;
|
||||
int8_t iVAddrOffset;
|
||||
int8_t iWAddrOffset;
|
||||
RESOURCE_RETURN_TYPE xType, yType, zType, wType;
|
||||
RESOURCE_DIMENSION eResDim;
|
||||
int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking
|
||||
|
||||
struct Use
|
||||
{
|
||||
Use() : m_Inst(0), m_Op(0) {}
|
||||
Use(const Use& a) = default;
|
||||
Use(Use&& a) = default;
|
||||
Use(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {}
|
||||
~Use() = default;
|
||||
|
||||
Use& operator=(const Use& a) = default;
|
||||
Use& operator=(Use&& a) = default;
|
||||
|
||||
Instruction* m_Inst; // The instruction that references the result of this instruction
|
||||
Operand* m_Op; // The operand within the instruction above. Note: can also be suboperand.
|
||||
};
|
||||
|
||||
std::vector<Use> m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg.
|
||||
|
||||
Instruction* m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment.
|
||||
bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation)
|
||||
uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it
|
||||
|
||||
uint64_t id;
|
||||
};
|
8
third_party/HLSLcc/src/internal_includes/LoopTransform.h
vendored
Normal file
8
third_party/HLSLcc/src/internal_includes/LoopTransform.h
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
class ShaderPhase;
|
||||
class HLSLCrossCompilerContext;
|
||||
namespace HLSLcc
|
||||
{
|
||||
void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase);
|
||||
}
|
150
third_party/HLSLcc/src/internal_includes/Operand.h
vendored
Normal file
150
third_party/HLSLcc/src/internal_includes/Operand.h
vendored
Normal file
@ -0,0 +1,150 @@
|
||||
#pragma once
|
||||
|
||||
#include "internal_includes/tokens.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
enum { MAX_SUB_OPERANDS = 3 };
|
||||
class Operand;
|
||||
class HLSLCrossCompilerContext;
|
||||
struct Instruction;
|
||||
|
||||
#if _MSC_VER
|
||||
// We want to disable the "array will be default-initialized" warning, as that's exactly what we want
|
||||
#pragma warning(disable: 4351)
|
||||
#endif
|
||||
|
||||
class Operand
|
||||
{
|
||||
public:
|
||||
typedef std::shared_ptr<Operand> SubOperandPtr;
|
||||
|
||||
Operand()
|
||||
:
|
||||
iExtended(),
|
||||
eType(),
|
||||
eModifier(),
|
||||
eMinPrecision(),
|
||||
iIndexDims(),
|
||||
iWriteMask(),
|
||||
iGSInput(),
|
||||
iPSInOut(),
|
||||
iWriteMaskEnabled(),
|
||||
iArrayElements(),
|
||||
iNumComponents(),
|
||||
eSelMode(),
|
||||
ui32CompMask(),
|
||||
ui32Swizzle(),
|
||||
aui32Swizzle(),
|
||||
aui32ArraySizes(),
|
||||
ui32RegisterNumber(),
|
||||
afImmediates(),
|
||||
adImmediates(),
|
||||
eSpecialName(),
|
||||
specialName(),
|
||||
eIndexRep(),
|
||||
m_SubOperands(),
|
||||
aeDataType(),
|
||||
m_Rebase(0),
|
||||
m_Size(0),
|
||||
m_Defines(),
|
||||
m_ForLoopInductorName(0)
|
||||
#ifdef _DEBUG
|
||||
, id(0)
|
||||
#endif
|
||||
{}
|
||||
|
||||
// Retrieve the mask of all the components this operand accesses (either reads from or writes to).
|
||||
// Note that destination writemask does affect the effective access mask.
|
||||
uint32_t GetAccessMask() const;
|
||||
|
||||
// Returns the index of the highest accessed component, based on component mask
|
||||
int GetMaxComponent() const;
|
||||
|
||||
bool IsSwizzleReplicated() const;
|
||||
|
||||
// Get the number of elements returned by operand, taking additional component mask into account
|
||||
//e.g.
|
||||
//.z = 1
|
||||
//.x = 1
|
||||
//.yw = 2
|
||||
uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const;
|
||||
|
||||
// When this operand is used as an input declaration, how many components does it have?
|
||||
int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const;
|
||||
|
||||
// Retrieve the operand data type.
|
||||
SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const;
|
||||
|
||||
// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
|
||||
int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const;
|
||||
// Same as above but with explicit shader type and phase
|
||||
int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const;
|
||||
|
||||
// Find the operand that contains the dynamic index for this operand (array in constant buffer).
|
||||
// When isAoS is true, we'll try to find the original index var to avoid additional calculations.
|
||||
// needsIndexCalcRevert output will tell if we need to divide the value to get the correct index.
|
||||
Operand* GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const;
|
||||
|
||||
// Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible
|
||||
static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec);
|
||||
|
||||
int iExtended;
|
||||
OPERAND_TYPE eType;
|
||||
OPERAND_MODIFIER eModifier;
|
||||
OPERAND_MIN_PRECISION eMinPrecision;
|
||||
int iIndexDims;
|
||||
int iWriteMask;
|
||||
int iGSInput;
|
||||
int iPSInOut;
|
||||
int iWriteMaskEnabled;
|
||||
int iArrayElements;
|
||||
int iNumComponents;
|
||||
|
||||
OPERAND_4_COMPONENT_SELECTION_MODE eSelMode;
|
||||
uint32_t ui32CompMask;
|
||||
uint32_t ui32Swizzle;
|
||||
uint32_t aui32Swizzle[4];
|
||||
|
||||
uint32_t aui32ArraySizes[3];
|
||||
uint32_t ui32RegisterNumber;
|
||||
//If eType is OPERAND_TYPE_IMMEDIATE32
|
||||
float afImmediates[4];
|
||||
//If eType is OPERAND_TYPE_IMMEDIATE64
|
||||
double adImmediates[4];
|
||||
|
||||
SPECIAL_NAME eSpecialName;
|
||||
std::string specialName;
|
||||
|
||||
OPERAND_INDEX_REPRESENTATION eIndexRep[3];
|
||||
|
||||
SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS];
|
||||
|
||||
//One type for each component.
|
||||
SHADER_VARIABLE_TYPE aeDataType[4];
|
||||
|
||||
uint32_t m_Rebase; // Rebase value, for constant array accesses.
|
||||
uint32_t m_Size; // Component count, only for constant array access.
|
||||
|
||||
struct Define
|
||||
{
|
||||
Define() : m_Inst(0), m_Op(0) {}
|
||||
Define(const Define& a) = default;
|
||||
Define(Define&& a) = default;
|
||||
Define(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {}
|
||||
~Define() = default;
|
||||
|
||||
Define& operator=(const Define& other) = default;
|
||||
Define& operator=(Define&& other) = default;
|
||||
|
||||
Instruction* m_Inst; // Instruction that writes to the temp
|
||||
Operand* m_Op; // The (destination) operand within that instruction.
|
||||
};
|
||||
|
||||
std::vector<Define> m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP)
|
||||
uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber)
|
||||
|
||||
#ifdef _DEBUG
|
||||
uint64_t id;
|
||||
#endif
|
||||
};
|
255
third_party/HLSLcc/src/internal_includes/Shader.h
vendored
Normal file
255
third_party/HLSLcc/src/internal_includes/Shader.h
vendored
Normal file
@ -0,0 +1,255 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#include "growing_array.h"
|
||||
#include "internal_includes/tokens.h"
|
||||
#include "internal_includes/reflect.h"
|
||||
#include "include/ShaderInfo.h"
|
||||
#include "internal_includes/Instruction.h"
|
||||
#include "internal_includes/Declaration.h"
|
||||
#include "internal_includes/ControlFlowGraph.h"
|
||||
#include "bstrlib.h"
|
||||
|
||||
struct ConstantArrayChunk
|
||||
{
|
||||
ConstantArrayChunk() : m_Size(0), m_AccessMask(0), m_Rebase(0), m_ComponentCount(0) {}
|
||||
ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse)
|
||||
: m_Size(sz), m_AccessMask(mask), m_Rebase(0), m_ComponentCount(0)
|
||||
{
|
||||
m_UseSites.push_back(firstUse);
|
||||
}
|
||||
|
||||
uint32_t m_Size;
|
||||
uint32_t m_AccessMask;
|
||||
uint32_t m_Rebase;
|
||||
uint32_t m_ComponentCount;
|
||||
|
||||
std::vector<Operand *> m_UseSites;
|
||||
};
|
||||
typedef std::multimap<uint32_t, ConstantArrayChunk> ChunkMap;
|
||||
|
||||
struct ConstantArrayInfo
|
||||
{
|
||||
ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {}
|
||||
|
||||
Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array
|
||||
ChunkMap m_Chunks; // map of <starting offset, chunk info>, same start offset might have multiple entries for different access masks
|
||||
};
|
||||
|
||||
class ShaderPhase
|
||||
{
|
||||
public:
|
||||
ShaderPhase()
|
||||
:
|
||||
ePhase(MAIN_PHASE),
|
||||
ui32InstanceCount(0),
|
||||
postShaderCode(),
|
||||
hasPostShaderCode(0),
|
||||
earlyMain(),
|
||||
ui32OrigTemps(0),
|
||||
ui32TotalTemps(0),
|
||||
psTempDeclaration(NULL),
|
||||
pui32SplitInfo(),
|
||||
peTempTypes(),
|
||||
acInputNeedsRedirect(),
|
||||
acOutputNeedsRedirect(),
|
||||
acPatchConstantsNeedsRedirect(),
|
||||
m_CFG(),
|
||||
m_CFGInitialized(false),
|
||||
m_NextFreeTempRegister(1),
|
||||
m_NextTexCoordTemp(0)
|
||||
{}
|
||||
|
||||
void ResolveUAVProperties(const ShaderInfo& sInfo);
|
||||
|
||||
void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier
|
||||
|
||||
void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller
|
||||
|
||||
void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first!
|
||||
|
||||
ConstantArrayInfo m_ConstantArrayInfo;
|
||||
|
||||
std::vector<Declaration> psDecl;
|
||||
std::vector<Instruction> psInst;
|
||||
|
||||
SHADER_PHASE_TYPE ePhase;
|
||||
uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1.
|
||||
bstring postShaderCode;//End of main or before emit()
|
||||
int hasPostShaderCode;
|
||||
|
||||
bstring earlyMain;//Code to be inserted at the start of phase
|
||||
|
||||
uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared
|
||||
uint32_t ui32TotalTemps; // The number of temporaries this phase has now
|
||||
Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode
|
||||
|
||||
// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
|
||||
// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
|
||||
std::vector<uint32_t> pui32SplitInfo;
|
||||
std::vector<SHADER_VARIABLE_TYPE> peTempTypes;
|
||||
|
||||
// These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together.
|
||||
std::vector<unsigned char> acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared.
|
||||
std::vector<unsigned char> acOutputNeedsRedirect; // Same for outputs
|
||||
std::vector<unsigned char> acPatchConstantsNeedsRedirect; // Same for patch constants
|
||||
|
||||
// Get the Control Flow Graph for this phase, build it if necessary.
|
||||
HLSLcc::ControlFlow::ControlFlowGraph &GetCFG();
|
||||
|
||||
uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops.
|
||||
uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds
|
||||
|
||||
private:
|
||||
bool m_CFGInitialized;
|
||||
HLSLcc::ControlFlow::ControlFlowGraph m_CFG;
|
||||
};
|
||||
|
||||
class Shader
|
||||
{
|
||||
public:
|
||||
|
||||
Shader()
|
||||
:
|
||||
ui32MajorVersion(0),
|
||||
ui32MinorVersion(0),
|
||||
eShaderType(INVALID_SHADER),
|
||||
eTargetLanguage(LANG_DEFAULT),
|
||||
extensions(0),
|
||||
fp64(0),
|
||||
ui32ShaderLength(0),
|
||||
aui32FuncTableToFuncPointer(),
|
||||
aui32FuncBodyToFuncTable(),
|
||||
funcTable(),
|
||||
funcPointer(),
|
||||
ui32NextClassFuncName(),
|
||||
pui32FirstToken(NULL),
|
||||
asPhases(),
|
||||
sInfo(),
|
||||
abScalarInput(),
|
||||
abScalarOutput(),
|
||||
aIndexedInput(),
|
||||
aIndexedOutput(),
|
||||
aIndexedInputParents(),
|
||||
aeResourceDims(),
|
||||
acInputDeclared(),
|
||||
acOutputDeclared(),
|
||||
aiOpcodeUsed(NUM_OPCODES, 0),
|
||||
ui32CurrentVertexOutputStream(0),
|
||||
textureSamplers(),
|
||||
m_DummySamplerDeclared(false),
|
||||
maxSemanticIndex(0)
|
||||
{
|
||||
}
|
||||
|
||||
// Retrieve the number of components the temp register has.
|
||||
uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const;
|
||||
|
||||
//Hull shaders have multiple phases.
|
||||
//Each phase has its own temps.
|
||||
//Convert from per-phase temps to global temps.
|
||||
void ConsolidateHullTempVars();
|
||||
|
||||
// Detect temp registers per data type that are actually used.
|
||||
void PruneTempRegisters();
|
||||
|
||||
// Check if inputs and outputs are accessed across semantic boundaries
|
||||
// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together.
|
||||
void AnalyzeIOOverlap();
|
||||
|
||||
// Compute maxSemanticIndex based on the results of AnalyzeIOOverlap
|
||||
void SetMaxSemanticIndex();
|
||||
|
||||
// Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs.
|
||||
void ForcePositionToHighp();
|
||||
|
||||
void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used.
|
||||
|
||||
void ExpandSWAPCs();
|
||||
|
||||
uint32_t ui32MajorVersion;
|
||||
uint32_t ui32MinorVersion;
|
||||
SHADER_TYPE eShaderType;
|
||||
|
||||
GLLang eTargetLanguage;
|
||||
const struct GlExtensions *extensions;
|
||||
|
||||
int fp64;
|
||||
|
||||
//DWORDs in program code, including version and length tokens.
|
||||
uint32_t ui32ShaderLength;
|
||||
|
||||
|
||||
//Instruction* functions;//non-main subroutines
|
||||
HLSLcc::growing_vector<uint32_t> aui32FuncTableToFuncPointer; // dynamic alloc?
|
||||
HLSLcc::growing_vector<uint32_t> aui32FuncBodyToFuncTable;
|
||||
|
||||
struct FuncTableEntry
|
||||
{
|
||||
HLSLcc::growing_vector<uint32_t> aui32FuncBodies;
|
||||
};
|
||||
HLSLcc::growing_vector<FuncTableEntry> funcTable;
|
||||
|
||||
struct FuncPointerEntry
|
||||
{
|
||||
HLSLcc::growing_vector<uint32_t> aui32FuncTables;
|
||||
uint32_t ui32NumBodiesPerTable;
|
||||
};
|
||||
|
||||
HLSLcc::growing_vector<FuncPointerEntry> funcPointer;
|
||||
|
||||
HLSLcc::growing_vector<uint32_t> ui32NextClassFuncName;
|
||||
|
||||
const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream.
|
||||
|
||||
std::vector<ShaderPhase> asPhases;
|
||||
|
||||
ShaderInfo sInfo;
|
||||
|
||||
// There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex.
|
||||
// Which one is used depends on the context:
|
||||
// per-vertex space is used in vertex/pixel/geom shaders always
|
||||
// hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT)
|
||||
// domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT
|
||||
|
||||
// Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch
|
||||
// Note that these ints are component masks
|
||||
HLSLcc::growing_vector<int> abScalarInput[2];
|
||||
HLSLcc::growing_vector<int> abScalarOutput[2];
|
||||
|
||||
HLSLcc::growing_vector<int> aIndexedInput[2];
|
||||
HLSLcc::growing_vector<bool> aIndexedOutput[2];
|
||||
|
||||
HLSLcc::growing_vector<int> aIndexedInputParents[2];
|
||||
|
||||
HLSLcc::growing_vector<RESOURCE_DIMENSION> aeResourceDims;
|
||||
|
||||
HLSLcc::growing_vector<char> acInputDeclared[2];
|
||||
HLSLcc::growing_vector<char> acOutputDeclared[2];
|
||||
|
||||
std::vector<int> aiOpcodeUsed; // Initialized to NUM_OPCODES elements above.
|
||||
|
||||
uint32_t ui32CurrentVertexOutputStream;
|
||||
|
||||
TextureSamplerPairs textureSamplers;
|
||||
|
||||
std::vector<char> psIntTempSizes; // Array for whether this temp register needs declaration as int temp
|
||||
std::vector<char> psInt16TempSizes; // min16ints
|
||||
std::vector<char> psInt12TempSizes; // min12ints
|
||||
std::vector<char> psUIntTempSizes; // Same for uints
|
||||
std::vector<char> psUInt16TempSizes; // ... and for uint16's
|
||||
std::vector<char> psFloatTempSizes; // ...and for floats
|
||||
std::vector<char> psFloat16TempSizes; // ...and for min16floats
|
||||
std::vector<char> psFloat10TempSizes; // ...and for min10floats
|
||||
std::vector<char> psDoubleTempSizes; // ...and for doubles
|
||||
std::vector<char> psBoolTempSizes; // ... and for bools
|
||||
|
||||
bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that.
|
||||
uint32_t maxSemanticIndex; // Highest semantic index found by SignatureAnalysis
|
||||
|
||||
private:
|
||||
void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand);
|
||||
};
|
32
third_party/HLSLcc/src/internal_includes/Translator.h
vendored
Normal file
32
third_party/HLSLcc/src/internal_includes/Translator.h
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
#include "HLSLCrossCompilerContext.h"
|
||||
#include "Shader.h"
|
||||
|
||||
struct Declaration;
|
||||
// Base class for translator backend implenentations.
|
||||
class Translator
|
||||
{
|
||||
protected:
|
||||
HLSLCrossCompilerContext *psContext;
|
||||
public:
|
||||
explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {}
|
||||
virtual ~Translator() {}
|
||||
|
||||
virtual bool Translate() = 0;
|
||||
|
||||
virtual void TranslateDeclaration(const Declaration *psDecl) = 0;
|
||||
|
||||
// Translate system value type to name, return true if succeeded and no further translation is necessary
|
||||
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL) = 0;
|
||||
|
||||
// In GLSL, the input and output names cannot clash.
|
||||
// Also, the output name of previous stage must match the input name of the next stage.
|
||||
// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program.
|
||||
//
|
||||
virtual void SetIOPrefixes() = 0;
|
||||
|
||||
void SetExtensions(const struct GlExtensions *ext)
|
||||
{
|
||||
psContext->psShader->extensions = ext;
|
||||
}
|
||||
};
|
138
third_party/HLSLcc/src/internal_includes/UseDefineChains.h
vendored
Normal file
138
third_party/HLSLcc/src/internal_includes/UseDefineChains.h
vendored
Normal file
@ -0,0 +1,138 @@
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
struct DefineUseChainEntry;
|
||||
struct UseDefineChainEntry;
|
||||
|
||||
typedef std::set<DefineUseChainEntry *> DefineSet;
|
||||
typedef std::set<UseDefineChainEntry *> UsageSet;
|
||||
|
||||
struct Instruction;
|
||||
class Operand;
|
||||
class ShaderInfo;
|
||||
namespace HLSLcc
|
||||
{
|
||||
namespace ControlFlow
|
||||
{
|
||||
class ControlFlowGraph;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Def-Use chain per temp component
|
||||
struct DefineUseChainEntry
|
||||
{
|
||||
DefineUseChainEntry()
|
||||
: psInst(0)
|
||||
, psOp(0)
|
||||
, usages()
|
||||
, writeMask(0)
|
||||
, index(0)
|
||||
, isStandalone(0)
|
||||
{
|
||||
memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *));
|
||||
}
|
||||
|
||||
Instruction *psInst; // The declaration (write to this temp component)
|
||||
Operand *psOp; // The operand within this instruction for the write target
|
||||
UsageSet usages; // List of usages that are dependent on this write
|
||||
uint32_t writeMask; // Access mask; which all components were written to in the same op
|
||||
uint32_t index; // For which component was this definition created for?
|
||||
uint32_t isStandalone; // A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings
|
||||
struct DefineUseChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this define's corresponding entries for the other components.
|
||||
|
||||
#if _DEBUG
|
||||
bool operator==(const DefineUseChainEntry &a) const
|
||||
{
|
||||
if (psInst != a.psInst)
|
||||
return false;
|
||||
if (psOp != a.psOp)
|
||||
return false;
|
||||
if (writeMask != a.writeMask)
|
||||
return false;
|
||||
if (index != a.index)
|
||||
return false;
|
||||
if (isStandalone != a.isStandalone)
|
||||
return false;
|
||||
|
||||
// Just check that each one has the same amount of usages
|
||||
if (usages.size() != a.usages.size())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef std::list<DefineUseChainEntry> DefineUseChain;
|
||||
|
||||
struct UseDefineChainEntry
|
||||
{
|
||||
UseDefineChainEntry()
|
||||
: psInst(0)
|
||||
, psOp(0)
|
||||
, defines()
|
||||
, accessMask(0)
|
||||
, index(0)
|
||||
{
|
||||
memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *));
|
||||
}
|
||||
|
||||
Instruction *psInst; // The use (read from this temp component)
|
||||
Operand *psOp; // The operand within this instruction for the read
|
||||
DefineSet defines; // List of writes that are visible to this read
|
||||
uint32_t accessMask; // Which all components were read together with this one
|
||||
uint32_t index; // For which component was this usage created for?
|
||||
struct UseDefineChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components.
|
||||
|
||||
#if _DEBUG
|
||||
bool operator==(const UseDefineChainEntry &a) const
|
||||
{
|
||||
if (psInst != a.psInst)
|
||||
return false;
|
||||
if (psOp != a.psOp)
|
||||
return false;
|
||||
if (accessMask != a.accessMask)
|
||||
return false;
|
||||
if (index != a.index)
|
||||
return false;
|
||||
|
||||
// Just check that each one has the same amount of usages
|
||||
if (defines.size() != a.defines.size())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef std::list<UseDefineChainEntry> UseDefineChain;
|
||||
|
||||
typedef std::map<uint32_t, UseDefineChain> UseDefineChains;
|
||||
typedef std::map<uint32_t, DefineUseChain> DefineUseChains;
|
||||
typedef std::vector<DefineUseChainEntry *> ActiveDefinitions;
|
||||
|
||||
// Do flow control analysis on the instructions and build the define-use and use-define chains
|
||||
void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, HLSLcc::ControlFlow::ControlFlowGraph &cfg);
|
||||
|
||||
// Do temp splitting based on use-define chains
|
||||
void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable);
|
||||
|
||||
// Based on the sampler precisions, downgrade the definitions if possible.
|
||||
void UpdateSamplerPrecisions(const ShaderInfo &psContext, DefineUseChains &psDUChains, uint32_t ui32NumTemps);
|
||||
|
||||
// Optimization pass for successive passes: Mark Operand->isStandalone for definitions that are "standalone": all usages (and all their sibligns) of this and all its siblings only see this definition.
|
||||
void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps);
|
||||
|
||||
// Write the uses and defines back to Instruction and Operand member lists.
|
||||
void WriteBackUsesAndDefines(DefineUseChains &psDUChains);
|
21
third_party/HLSLcc/src/internal_includes/debug.h
vendored
Normal file
21
third_party/HLSLcc/src/internal_includes/debug.h
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
#ifndef DEBUG_H_
|
||||
#define DEBUG_H_
|
||||
|
||||
#ifdef _DEBUG
|
||||
#include "assert.h"
|
||||
#define ASSERT(expr) CustomAssert(expr)
|
||||
static void CustomAssert(int expression)
|
||||
{
|
||||
if (!expression)
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#define UNUSED(EXPR_) \
|
||||
do { if (false) (void)(EXPR_); } while(0)
|
||||
#define ASSERT(expr) UNUSED(expr)
|
||||
#endif
|
||||
|
||||
#endif
|
10
third_party/HLSLcc/src/internal_includes/decode.h
vendored
Normal file
10
third_party/HLSLcc/src/internal_includes/decode.h
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef DECODE_H
|
||||
#define DECODE_H
|
||||
|
||||
#include "internal_includes/Shader.h"
|
||||
|
||||
Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags);
|
||||
|
||||
void UpdateOperandReferences(Shader* psShader, SHADER_PHASE_TYPE eShaderPhaseType, Instruction* psInst);
|
||||
|
||||
#endif
|
328
third_party/HLSLcc/src/internal_includes/languages.h
vendored
Normal file
328
third_party/HLSLcc/src/internal_includes/languages.h
vendored
Normal file
@ -0,0 +1,328 @@
|
||||
#ifndef LANGUAGES_H
|
||||
#define LANGUAGES_H
|
||||
|
||||
#include "hlslcc.h"
|
||||
#include "HLSLCrossCompilerContext.h"
|
||||
#include "Shader.h"
|
||||
|
||||
static int InOutSupported(const GLLang eLang)
|
||||
{
|
||||
if (eLang == LANG_ES_100 || eLang == LANG_120)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int WriteToFragData(const GLLang eLang)
|
||||
{
|
||||
if (eLang == LANG_ES_100 || eLang == LANG_120)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ShaderBitEncodingSupported(const GLLang eLang)
|
||||
{
|
||||
if (eLang != LANG_ES_300 &&
|
||||
eLang != LANG_ES_310 &&
|
||||
eLang < LANG_330)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int HaveOverloadedTextureFuncs(const GLLang eLang)
|
||||
{
|
||||
if (eLang == LANG_ES_100 || eLang == LANG_120)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool IsMobileTarget(const HLSLCrossCompilerContext *psContext)
|
||||
{
|
||||
if ((psContext->flags & HLSLCC_FLAG_MOBILE_TARGET) != 0)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
//Only enable for ES. Vulkan and Switch.
|
||||
//Not present in 120, ignored in other desktop languages. Specifically enabled on Vulkan.
|
||||
static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext)
|
||||
{
|
||||
if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET) != 0)
|
||||
return 1;
|
||||
|
||||
const GLLang eLang = psContext->psShader->eTargetLanguage;
|
||||
if (eLang >= LANG_ES_100 && eLang <= LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int EmitLowp(const HLSLCrossCompilerContext *psContext)
|
||||
{
|
||||
const GLLang eLang = psContext->psShader->eTargetLanguage;
|
||||
return eLang == LANG_ES_100 ? 1 : 0;
|
||||
}
|
||||
|
||||
static int HaveCubemapArray(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_400 && eLang <= LANG_GL_LAST)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool IsESLanguage(const GLLang eLang)
|
||||
{
|
||||
return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST);
|
||||
}
|
||||
|
||||
static bool IsDesktopGLLanguage(const GLLang eLang)
|
||||
{
|
||||
return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST);
|
||||
}
|
||||
|
||||
//Only on vertex inputs and pixel outputs.
|
||||
static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct GlExtensions *extensions)
|
||||
{
|
||||
if (eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveInOutLocationQualifier(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_410 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//layout(binding = X) uniform {uniformA; uniformB;}
|
||||
//layout(location = X) uniform uniform_name;
|
||||
static int HaveUniformBindingsAndLocations(const GLLang eLang, const struct GlExtensions *extensions, unsigned int flags)
|
||||
{
|
||||
if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)
|
||||
return 0;
|
||||
|
||||
if (eLang >= LANG_430 || eLang == LANG_ES_310 ||
|
||||
(extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int DualSourceBlendSupported(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_330)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int SubroutinesSupported(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_400)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//Before 430, flat/smooth/centroid/noperspective must match
|
||||
//between fragment and its previous stage.
|
||||
//HLSL bytecode only tells us the interpolation in pixel shader.
|
||||
static int PixelInterpDependency(const GLLang eLang)
|
||||
{
|
||||
if (eLang < LANG_430)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveUnsignedTypes(const GLLang eLang)
|
||||
{
|
||||
switch (eLang)
|
||||
{
|
||||
case LANG_ES_100:
|
||||
case LANG_120:
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int HaveBitEncodingOps(const GLLang eLang)
|
||||
{
|
||||
switch (eLang)
|
||||
{
|
||||
case LANG_ES_100:
|
||||
case LANG_120:
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int HaveNativeBitwiseOps(const GLLang eLang)
|
||||
{
|
||||
switch (eLang)
|
||||
{
|
||||
case LANG_ES_100:
|
||||
case LANG_120:
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int HaveDynamicIndexing(HLSLCrossCompilerContext *psContext, const Operand* psOperand = NULL)
|
||||
{
|
||||
// WebGL only allows dynamic indexing with constant expressions, loop indices or a combination.
|
||||
// The only exception is for uniform access in vertex shaders, which can be indexed using any expression.
|
||||
|
||||
switch (psContext->psShader->eTargetLanguage)
|
||||
{
|
||||
case LANG_ES_100:
|
||||
case LANG_120:
|
||||
if (psOperand != NULL)
|
||||
{
|
||||
if (psOperand->m_ForLoopInductorName)
|
||||
return 1;
|
||||
|
||||
if (psContext->psShader->eShaderType == VERTEX_SHADER && psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int HaveGather(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_400 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveGatherNonConstOffset(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_420 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveQueryLod(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_400)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveQueryLevels(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_430)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveFragmentCoordConventions(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_150)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveGeometryShaderARB(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_150)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveAtomicCounter(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_420 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveAtomicMem(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_430 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveImageAtomics(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_420)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveCompute(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_430 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HaveImageLoadStore(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_420 || eLang == LANG_ES_310)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int HavePreciseQualifier(const GLLang eLang)
|
||||
{
|
||||
if (eLang >= LANG_400) // TODO: Add for ES when we're adding 3.2 lang
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
26
third_party/HLSLcc/src/internal_includes/reflect.h
vendored
Normal file
26
third_party/HLSLcc/src/internal_includes/reflect.h
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
#ifndef REFLECT_H
|
||||
#define REFLECT_H
|
||||
|
||||
#include "hlslcc.h"
|
||||
|
||||
struct ShaderPhase_TAG;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t* pui32Inputs;
|
||||
uint32_t* pui32Outputs;
|
||||
uint32_t* pui32Resources;
|
||||
uint32_t* pui32Interfaces;
|
||||
uint32_t* pui32Inputs11;
|
||||
uint32_t* pui32Outputs11;
|
||||
uint32_t* pui32OutputsWithStreams;
|
||||
uint32_t* pui32PatchConstants;
|
||||
uint32_t* pui32PatchConstants11;
|
||||
} ReflectionChunks;
|
||||
|
||||
void LoadShaderInfo(const uint32_t ui32MajorVersion,
|
||||
const uint32_t ui32MinorVersion,
|
||||
const ReflectionChunks* psChunks,
|
||||
ShaderInfo* psInfo, uint32_t decodeFlags);
|
||||
|
||||
#endif
|
244
third_party/HLSLcc/src/internal_includes/toGLSL.h
vendored
Normal file
244
third_party/HLSLcc/src/internal_includes/toGLSL.h
vendored
Normal file
@ -0,0 +1,244 @@
|
||||
#pragma once
|
||||
|
||||
#include "hlslcc.h"
|
||||
#include "internal_includes/Translator.h"
|
||||
|
||||
class HLSLCrossCompilerContext;
|
||||
|
||||
class ToGLSL : public Translator
|
||||
{
|
||||
protected:
|
||||
GLLang language;
|
||||
bool m_NeedUnityInstancingArraySizeDecl;
|
||||
bool m_NeedUnityPreTransformDecl;
|
||||
|
||||
public:
|
||||
explicit ToGLSL(HLSLCrossCompilerContext* ctx) :
|
||||
Translator(ctx),
|
||||
language(LANG_DEFAULT),
|
||||
m_NeedUnityInstancingArraySizeDecl(false),
|
||||
m_NeedUnityPreTransformDecl(false),
|
||||
m_NumDeclaredWhileTrueLoops(0)
|
||||
{}
|
||||
// Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language
|
||||
GLLang SetLanguage(GLLang suggestedLanguage);
|
||||
|
||||
virtual bool Translate();
|
||||
virtual void TranslateDeclaration(const Declaration* psDecl);
|
||||
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL);
|
||||
virtual void SetIOPrefixes();
|
||||
|
||||
private:
|
||||
void TranslateOperand(bstring glsl, const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false);
|
||||
void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false);
|
||||
void TranslateInstruction(Instruction* psInst, bool isEmbedded = false);
|
||||
|
||||
void TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false);
|
||||
void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false);
|
||||
|
||||
void TranslateOperandIndex(const Operand* psOperand, int index);
|
||||
void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add);
|
||||
|
||||
void AddOpAssignToDestWithMask(const Operand* psDest,
|
||||
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int *pNeedsParenthesis, uint32_t ui32CompMask);
|
||||
void AddAssignToDest(const Operand* psDest,
|
||||
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int* pNeedsParenthesis);
|
||||
void AddAssignPrologue(int numParenthesis, bool isEmbedded = false);
|
||||
|
||||
|
||||
void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName);
|
||||
void AddBuiltinInput(const Declaration* psDecl, const char* builtinName);
|
||||
void HandleOutputRedirect(const Declaration *psDecl, const char *Precision);
|
||||
void HandleInputRedirect(const Declaration *psDecl, const char *Precision);
|
||||
|
||||
void AddUserOutput(const Declaration* psDecl);
|
||||
void DeclareStructConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, const Operand* psOperand, bstring glsl);
|
||||
void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix, bool reportInReflection);
|
||||
void PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType);
|
||||
void DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl);
|
||||
|
||||
void ReportStruct(const std::string &name, const struct ShaderVarType* psType);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CMP_EQ,
|
||||
CMP_LT,
|
||||
CMP_GE,
|
||||
CMP_NE,
|
||||
} ComparisonType;
|
||||
|
||||
void AddComparison(Instruction* psInst, ComparisonType eType,
|
||||
uint32_t typeFlag);
|
||||
|
||||
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise, bool isEmbedded = false);
|
||||
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise);
|
||||
void CallBinaryOp(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false);
|
||||
void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
|
||||
int dest, int src0, int src1, int src2, uint32_t dataType);
|
||||
void CallHelper3(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2Int(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2UInt(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper1(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int paramsShouldFollowWriteMask);
|
||||
void CallHelper1Int(
|
||||
const char* name,
|
||||
Instruction* psInst,
|
||||
const int dest,
|
||||
const int src0,
|
||||
int paramsShouldFollowWriteMask);
|
||||
void TranslateTexelFetch(
|
||||
Instruction* psInst,
|
||||
const ResourceBinding* psBinding,
|
||||
bstring glsl);
|
||||
void TranslateTexCoord(
|
||||
const RESOURCE_DIMENSION eResDim,
|
||||
Operand* psTexCoordOperand);
|
||||
void GetResInfoData(Instruction* psInst, int index, int destElem);
|
||||
void TranslateTextureSample(Instruction* psInst,
|
||||
uint32_t ui32Flags);
|
||||
void TranslateDynamicComponentSelection(const ShaderVarType* psVarType,
|
||||
const Operand* psByteAddr, uint32_t offset, uint32_t mask);
|
||||
void TranslateShaderStorageStore(Instruction* psInst);
|
||||
void TranslateShaderStorageLoad(Instruction* psInst);
|
||||
void TranslateAtomicMemOp(Instruction* psInst);
|
||||
void TranslateConditional(
|
||||
Instruction* psInst,
|
||||
bstring glsl);
|
||||
|
||||
void HandleSwitchTransformation(Instruction* psInst, bstring glsl);
|
||||
|
||||
// Add an extra function to the m_FunctionDefinitions list, unless it's already there.
|
||||
bool DeclareExtraFunction(const std::string &name, bstring body);
|
||||
void UseExtraFunctionDependency(const std::string &name);
|
||||
|
||||
void DeclareDynamicIndexWrapper(const struct ShaderVarType* psType);
|
||||
void DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements);
|
||||
|
||||
bool RenderTargetDeclared(uint32_t input);
|
||||
|
||||
std::string GetVulkanDummySamplerName();
|
||||
|
||||
// A <function name, body text> map of extra helper functions we'll need.
|
||||
FunctionDefinitions m_FunctionDefinitions;
|
||||
std::vector<std::string> m_FunctionDefinitionsOrder;
|
||||
|
||||
std::vector<std::string> m_AdditionalDefinitions;
|
||||
|
||||
std::vector<std::string> m_DefinedStructs;
|
||||
|
||||
std::set<uint32_t> m_DeclaredRenderTarget;
|
||||
int m_NumDeclaredWhileTrueLoops;
|
||||
|
||||
struct SwitchConversion
|
||||
{
|
||||
/*
|
||||
IF (CONDITION1) BREAK; STATEMENT1; IF (CONDITION2) BREAK; STATEMENT2;... transforms to
|
||||
if (CONDITION1) {} ELSE { STATEMENT1; IF (CONDITION2) {} ELSE {STATEMENT2; ...} }
|
||||
thus, we need to count the "BREAK" statements we encountered in each IF on the same level inside a SWITCH.
|
||||
*/
|
||||
struct ConditionalInfo
|
||||
{
|
||||
int breakCount; // Count BREAK on the same level to emit enough closing braces afterwards
|
||||
bool breakEncountered; // Just encountered a BREAK statment, potentially need to emit "ELSE"
|
||||
bool endifEncountered; // We need to check for "ENDIF ELSE" sequence, and not emit "else" if we see it
|
||||
|
||||
ConditionalInfo() :
|
||||
ConditionalInfo(0, false)
|
||||
{}
|
||||
|
||||
explicit ConditionalInfo(int initialBreakCount) :
|
||||
ConditionalInfo(initialBreakCount, false)
|
||||
{}
|
||||
|
||||
ConditionalInfo(int initialBreakCount, bool withEndif) :
|
||||
ConditionalInfo(initialBreakCount, withEndif, false)
|
||||
{}
|
||||
|
||||
ConditionalInfo(int initialBreakCount, bool withEndif, bool withBreak) :
|
||||
breakCount(initialBreakCount),
|
||||
endifEncountered(withEndif),
|
||||
breakEncountered(withBreak)
|
||||
{}
|
||||
};
|
||||
|
||||
bstring switchOperand;
|
||||
// We defer emitting if (condition) for each CASE statement to concatenate possible CASE A: CASE B:... into one if ().
|
||||
std::vector<bstring> currentCaseOperands;
|
||||
std::vector<ConditionalInfo> conditionalsInfo;
|
||||
int isInLoop; // We don't count "BREAK" (end emit them) if we're in a loop.
|
||||
bool isFirstCase;
|
||||
|
||||
SwitchConversion() :
|
||||
switchOperand(bfromcstr("")),
|
||||
isInLoop(0),
|
||||
isFirstCase(true)
|
||||
{}
|
||||
|
||||
SwitchConversion(const SwitchConversion& other) :
|
||||
switchOperand(bstrcpy(other.switchOperand)),
|
||||
conditionalsInfo(other.conditionalsInfo),
|
||||
isInLoop(other.isInLoop),
|
||||
isFirstCase(other.isFirstCase)
|
||||
{
|
||||
currentCaseOperands.reserve(other.currentCaseOperands.size());
|
||||
for (size_t i = 0; i < other.currentCaseOperands.size(); ++i)
|
||||
currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i]));
|
||||
}
|
||||
|
||||
SwitchConversion(SwitchConversion&& other) :
|
||||
switchOperand(other.switchOperand),
|
||||
currentCaseOperands(std::move(other.currentCaseOperands)),
|
||||
conditionalsInfo(std::move(other.conditionalsInfo)),
|
||||
isInLoop(other.isInLoop),
|
||||
isFirstCase(other.isFirstCase)
|
||||
{
|
||||
other.switchOperand = nullptr;
|
||||
}
|
||||
|
||||
~SwitchConversion()
|
||||
{
|
||||
bdestroy(switchOperand);
|
||||
for (size_t i = 0; i < currentCaseOperands.size(); ++i)
|
||||
bdestroy(currentCaseOperands[i]);
|
||||
}
|
||||
|
||||
SwitchConversion& operator=(const SwitchConversion& other)
|
||||
{
|
||||
if (this == &other)
|
||||
return *this;
|
||||
|
||||
switchOperand = bstrcpy(other.switchOperand);
|
||||
conditionalsInfo = other.conditionalsInfo;
|
||||
isInLoop = other.isInLoop;
|
||||
isFirstCase = other.isFirstCase;
|
||||
currentCaseOperands.reserve(other.currentCaseOperands.size());
|
||||
for (size_t i = 0; i < other.currentCaseOperands.size(); ++i)
|
||||
currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i]));
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
SwitchConversion& operator=(SwitchConversion&& other)
|
||||
{
|
||||
if (this == &other)
|
||||
return *this;
|
||||
|
||||
switchOperand = other.switchOperand;
|
||||
conditionalsInfo = std::move(other.conditionalsInfo);
|
||||
isInLoop = other.isInLoop;
|
||||
isFirstCase = other.isFirstCase;
|
||||
currentCaseOperands = std::move(other.currentCaseOperands);
|
||||
|
||||
other.switchOperand = nullptr;
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
std::vector<SwitchConversion> m_SwitchStack;
|
||||
};
|
26
third_party/HLSLcc/src/internal_includes/toGLSLOperand.h
vendored
Normal file
26
third_party/HLSLcc/src/internal_includes/toGLSLOperand.h
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
#ifndef TO_GLSL_OPERAND_H
|
||||
#define TO_GLSL_OPERAND_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "bstrlib.h"
|
||||
#include "ShaderInfo.h"
|
||||
|
||||
class HLSLCrossCompilerContext;
|
||||
|
||||
//void TranslateOperand(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag);
|
||||
// Translate operand but add additional component mask
|
||||
//void TranslateOperandWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask);
|
||||
|
||||
void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase);
|
||||
void TranslateOperandSwizzleWithMask(bstring glsl, HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase);
|
||||
void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase);
|
||||
|
||||
void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare);
|
||||
std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare);
|
||||
|
||||
std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
|
||||
void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare);
|
||||
|
||||
std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name);
|
||||
|
||||
#endif
|
182
third_party/HLSLcc/src/internal_includes/toMetal.h
vendored
Normal file
182
third_party/HLSLcc/src/internal_includes/toMetal.h
vendored
Normal file
@ -0,0 +1,182 @@
|
||||
#pragma once
|
||||
#include "internal_includes/Translator.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
struct SamplerDesc
|
||||
{
|
||||
std::string name;
|
||||
uint32_t reg, slot;
|
||||
};
|
||||
struct TextureSamplerDesc
|
||||
{
|
||||
std::string name;
|
||||
int textureBind, samplerBind;
|
||||
HLSLCC_TEX_DIMENSION dim;
|
||||
bool isMultisampled;
|
||||
bool isDepthSampler;
|
||||
bool uav;
|
||||
};
|
||||
|
||||
class ToMetal : public Translator
|
||||
{
|
||||
public:
|
||||
explicit ToMetal(HLSLCrossCompilerContext *ctx)
|
||||
: Translator(ctx)
|
||||
, m_ShadowSamplerDeclared(false)
|
||||
, m_NeedFBOutputRemapDecl(false)
|
||||
, m_NeedFBInputRemapDecl(false)
|
||||
{}
|
||||
|
||||
virtual bool Translate();
|
||||
virtual void TranslateDeclaration(const Declaration *psDecl);
|
||||
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL);
|
||||
std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL);
|
||||
|
||||
virtual void SetIOPrefixes();
|
||||
|
||||
private:
|
||||
void TranslateInstruction(Instruction* psInst);
|
||||
|
||||
void DeclareBuiltinInput(const Declaration *psDecl);
|
||||
void DeclareBuiltinOutput(const Declaration *psDecl);
|
||||
void DeclareClipPlanes(const Declaration* decl, unsigned declCount);
|
||||
void GenerateTexturesReflection(HLSLccReflection* refl);
|
||||
|
||||
// Retrieve the name of the output struct for this shader
|
||||
std::string GetOutputStructName() const;
|
||||
std::string GetInputStructName() const;
|
||||
std::string GetCBName(const std::string& cbName) const;
|
||||
|
||||
void DeclareHullShaderPassthrough();
|
||||
void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName);
|
||||
void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName);
|
||||
|
||||
void DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint);
|
||||
void DeclareStructType(const std::string &name, const std::vector<ShaderVar> &contents, bool withinCB = false, uint32_t cumulativeOffset = 0, bool stripUnused = false);
|
||||
void DeclareStructType(const std::string &name, const std::vector<ShaderVarType> &contents, bool withinCB = false, uint32_t cumulativeOffset = 0);
|
||||
void DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB = false, uint32_t cumulativeOffset = 0, bool isUsed = true);
|
||||
void DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB = false, uint32_t cumulativeOffset = 0, bool isUsed = true);
|
||||
void DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool isUAV);
|
||||
|
||||
void DeclareResource(const Declaration *psDecl);
|
||||
void TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim);
|
||||
|
||||
void DeclareOutput(const Declaration *decl);
|
||||
|
||||
void PrintStructDeclarations(StructDefinitions &defs, const char *name = "");
|
||||
|
||||
std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber);
|
||||
|
||||
// ToMetalOperand.cpp
|
||||
std::string TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot = true);
|
||||
std::string TranslateOperandIndex(const Operand* psOperand, int index);
|
||||
std::string TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase);
|
||||
|
||||
// ToMetalInstruction.cpp
|
||||
|
||||
void AddOpAssignToDestWithMask(const Operand* psDest,
|
||||
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis, uint32_t ui32CompMask);
|
||||
void AddAssignToDest(const Operand* psDest,
|
||||
SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis);
|
||||
void AddAssignPrologue(int numParenthesis);
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CMP_EQ,
|
||||
CMP_LT,
|
||||
CMP_GE,
|
||||
CMP_NE,
|
||||
} ComparisonType;
|
||||
|
||||
void AddComparison(Instruction* psInst, ComparisonType eType,
|
||||
uint32_t typeFlag);
|
||||
|
||||
bool CanForceToHalfOperand(const Operand *psOperand);
|
||||
|
||||
void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise);
|
||||
void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise);
|
||||
void CallBinaryOp(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType);
|
||||
void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst,
|
||||
int dest, int src0, int src1, int src2, uint32_t dataType);
|
||||
void CallHelper3(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags);
|
||||
void CallHelper3(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2Int(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper2UInt(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int src1, int paramsShouldFollowWriteMask);
|
||||
void CallHelper1(const char* name, Instruction* psInst,
|
||||
int dest, int src0, int paramsShouldFollowWriteMask);
|
||||
void CallHelper1Int(
|
||||
const char* name,
|
||||
Instruction* psInst,
|
||||
const int dest,
|
||||
const int src0,
|
||||
int paramsShouldFollowWriteMask);
|
||||
void TranslateTexelFetch(
|
||||
Instruction* psInst,
|
||||
const ResourceBinding* psBinding,
|
||||
bstring glsl);
|
||||
void TranslateTexelFetchOffset(
|
||||
Instruction* psInst,
|
||||
const ResourceBinding* psBinding,
|
||||
bstring glsl);
|
||||
void TranslateTexCoord(
|
||||
const RESOURCE_DIMENSION eResDim,
|
||||
Operand* psTexCoordOperand);
|
||||
void GetResInfoData(Instruction* psInst, int index, int destElem);
|
||||
void TranslateTextureSample(Instruction* psInst,
|
||||
uint32_t ui32Flags);
|
||||
void TranslateDynamicComponentSelection(const ShaderVarType* psVarType,
|
||||
const Operand* psByteAddr, uint32_t offset, uint32_t mask);
|
||||
void TranslateShaderStorageStore(Instruction* psInst);
|
||||
void TranslateShaderStorageLoad(Instruction* psInst);
|
||||
void TranslateAtomicMemOp(Instruction* psInst);
|
||||
void TranslateConditional(
|
||||
Instruction* psInst,
|
||||
bstring glsl);
|
||||
|
||||
// The map is keyed by struct name. The special name "" (empty string) is reserved for entry point function parameters
|
||||
StructDefinitions m_StructDefinitions;
|
||||
|
||||
// A <function name, body text> map of extra helper functions we'll need.
|
||||
FunctionDefinitions m_FunctionDefinitions;
|
||||
|
||||
BindingSlotAllocator m_TextureSlots, m_SamplerSlots;
|
||||
BindingSlotAllocator m_BufferSlots;
|
||||
|
||||
struct BufferReflection
|
||||
{
|
||||
uint32_t bind;
|
||||
bool isUAV;
|
||||
bool hasCounter;
|
||||
};
|
||||
std::map<std::string, BufferReflection> m_BufferReflections;
|
||||
|
||||
std::vector<SamplerDesc> m_Samplers;
|
||||
std::vector<TextureSamplerDesc> m_Textures;
|
||||
|
||||
std::string m_ExtraGlobalDefinitions;
|
||||
|
||||
// Flags for whether we need to add the declaration for the FB IO remaps
|
||||
bool m_NeedFBInputRemapDecl;
|
||||
bool m_NeedFBOutputRemapDecl;
|
||||
|
||||
bool m_ShadowSamplerDeclared;
|
||||
|
||||
void EnsureShadowSamplerDeclared();
|
||||
|
||||
// Add an extra function to the m_FunctionDefinitions list, unless it's already there.
|
||||
void DeclareExtraFunction(const std::string &name, const std::string &body);
|
||||
|
||||
// Move all lowp -> mediump
|
||||
void ClampPartialPrecisions();
|
||||
|
||||
// Reseve UAV slots in advance to match the original HLSL bindings -> correct bindings in SetRandomWriteTarget()
|
||||
void ReserveUAVBindingSlots(ShaderPhase *phase);
|
||||
};
|
3
third_party/HLSLcc/src/internal_includes/toMetalDeclaration.h
vendored
Normal file
3
third_party/HLSLcc/src/internal_includes/toMetalDeclaration.h
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
#include "internal_includes/Declaration.h"
|
789
third_party/HLSLcc/src/internal_includes/tokens.h
vendored
Normal file
789
third_party/HLSLcc/src/internal_includes/tokens.h
vendored
Normal file
@ -0,0 +1,789 @@
|
||||
#ifndef TOKENS_H
|
||||
#define TOKENS_H
|
||||
|
||||
#include "hlslcc.h"
|
||||
|
||||
enum SHADER_PHASE_TYPE
|
||||
{
|
||||
SHADER_PHASE_INVALID = -1,
|
||||
MAIN_PHASE = 0,
|
||||
HS_GLOBAL_DECL_PHASE = 1,
|
||||
HS_CTRL_POINT_PHASE = 2,
|
||||
HS_FORK_PHASE = 3,
|
||||
HS_JOIN_PHASE = 4
|
||||
};
|
||||
|
||||
static SHADER_TYPE DecodeShaderType(uint32_t ui32Token)
|
||||
{
|
||||
return (SHADER_TYPE)((ui32Token & 0xffff0000) >> 16);
|
||||
}
|
||||
|
||||
static uint32_t DecodeProgramMajorVersion(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x000000f0) >> 4;
|
||||
}
|
||||
|
||||
static uint32_t DecodeProgramMinorVersion(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x0000000f);
|
||||
}
|
||||
|
||||
static uint32_t DecodeInstructionLength(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x7f000000) >> 24;
|
||||
}
|
||||
|
||||
static uint32_t DecodeIsOpcodeExtended(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x80000000) >> 31;
|
||||
}
|
||||
|
||||
typedef enum EXTENDED_OPCODE_TYPE
|
||||
{
|
||||
EXTENDED_OPCODE_EMPTY = 0,
|
||||
EXTENDED_OPCODE_SAMPLE_CONTROLS = 1,
|
||||
EXTENDED_OPCODE_RESOURCE_DIM = 2,
|
||||
EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3,
|
||||
} EXTENDED_OPCODE_TYPE;
|
||||
|
||||
static EXTENDED_OPCODE_TYPE DecodeExtendedOpcodeType(uint32_t ui32Token)
|
||||
{
|
||||
return (EXTENDED_OPCODE_TYPE)(ui32Token & 0x0000003f);
|
||||
}
|
||||
|
||||
static RESOURCE_RETURN_TYPE DecodeResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token)
|
||||
{
|
||||
return (RESOURCE_RETURN_TYPE)((ui32Token >> (ui32Coord * 4)) & 0xF);
|
||||
}
|
||||
|
||||
static RESOURCE_RETURN_TYPE DecodeExtendedResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token)
|
||||
{
|
||||
return (RESOURCE_RETURN_TYPE)((ui32Token >> (ui32Coord * 4 + 6)) & 0xF);
|
||||
}
|
||||
|
||||
enum OPCODE_TYPE
|
||||
{
|
||||
//For DX9
|
||||
OPCODE_POW = -6,
|
||||
OPCODE_DP2ADD = -5,
|
||||
OPCODE_LRP = -4,
|
||||
OPCODE_ENDREP = -3,
|
||||
OPCODE_REP = -2,
|
||||
OPCODE_SPECIAL_DCL_IMMCONST = -1,
|
||||
|
||||
OPCODE_ADD,
|
||||
OPCODE_AND,
|
||||
OPCODE_BREAK,
|
||||
OPCODE_BREAKC,
|
||||
OPCODE_CALL,
|
||||
OPCODE_CALLC,
|
||||
OPCODE_CASE,
|
||||
OPCODE_CONTINUE,
|
||||
OPCODE_CONTINUEC,
|
||||
OPCODE_CUT,
|
||||
OPCODE_DEFAULT,
|
||||
OPCODE_DERIV_RTX,
|
||||
OPCODE_DERIV_RTY,
|
||||
OPCODE_DISCARD,
|
||||
OPCODE_DIV,
|
||||
OPCODE_DP2,
|
||||
OPCODE_DP3,
|
||||
OPCODE_DP4,
|
||||
OPCODE_ELSE,
|
||||
OPCODE_EMIT,
|
||||
OPCODE_EMITTHENCUT,
|
||||
OPCODE_ENDIF,
|
||||
OPCODE_ENDLOOP,
|
||||
OPCODE_ENDSWITCH,
|
||||
OPCODE_EQ,
|
||||
OPCODE_EXP,
|
||||
OPCODE_FRC,
|
||||
OPCODE_FTOI,
|
||||
OPCODE_FTOU,
|
||||
OPCODE_GE,
|
||||
OPCODE_IADD,
|
||||
OPCODE_IF,
|
||||
OPCODE_IEQ,
|
||||
OPCODE_IGE,
|
||||
OPCODE_ILT,
|
||||
OPCODE_IMAD,
|
||||
OPCODE_IMAX,
|
||||
OPCODE_IMIN,
|
||||
OPCODE_IMUL,
|
||||
OPCODE_INE,
|
||||
OPCODE_INEG,
|
||||
OPCODE_ISHL,
|
||||
OPCODE_ISHR,
|
||||
OPCODE_ITOF,
|
||||
OPCODE_LABEL,
|
||||
OPCODE_LD,
|
||||
OPCODE_LD_MS,
|
||||
OPCODE_LOG,
|
||||
OPCODE_LOOP,
|
||||
OPCODE_LT,
|
||||
OPCODE_MAD,
|
||||
OPCODE_MIN,
|
||||
OPCODE_MAX,
|
||||
OPCODE_CUSTOMDATA,
|
||||
OPCODE_MOV,
|
||||
OPCODE_MOVC,
|
||||
OPCODE_MUL,
|
||||
OPCODE_NE,
|
||||
OPCODE_NOP,
|
||||
OPCODE_NOT,
|
||||
OPCODE_OR,
|
||||
OPCODE_RESINFO,
|
||||
OPCODE_RET,
|
||||
OPCODE_RETC,
|
||||
OPCODE_ROUND_NE,
|
||||
OPCODE_ROUND_NI,
|
||||
OPCODE_ROUND_PI,
|
||||
OPCODE_ROUND_Z,
|
||||
OPCODE_RSQ,
|
||||
OPCODE_SAMPLE,
|
||||
OPCODE_SAMPLE_C,
|
||||
OPCODE_SAMPLE_C_LZ,
|
||||
OPCODE_SAMPLE_L,
|
||||
OPCODE_SAMPLE_D,
|
||||
OPCODE_SAMPLE_B,
|
||||
OPCODE_SQRT,
|
||||
OPCODE_SWITCH,
|
||||
OPCODE_SINCOS,
|
||||
OPCODE_UDIV,
|
||||
OPCODE_ULT,
|
||||
OPCODE_UGE,
|
||||
OPCODE_UMUL,
|
||||
OPCODE_UMAD,
|
||||
OPCODE_UMAX,
|
||||
OPCODE_UMIN,
|
||||
OPCODE_USHR,
|
||||
OPCODE_UTOF,
|
||||
OPCODE_XOR,
|
||||
OPCODE_DCL_RESOURCE, // DCL* opcodes have
|
||||
OPCODE_DCL_CONSTANT_BUFFER, // custom operand formats.
|
||||
OPCODE_DCL_SAMPLER,
|
||||
OPCODE_DCL_INDEX_RANGE,
|
||||
OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY,
|
||||
OPCODE_DCL_GS_INPUT_PRIMITIVE,
|
||||
OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT,
|
||||
OPCODE_DCL_INPUT,
|
||||
OPCODE_DCL_INPUT_SGV,
|
||||
OPCODE_DCL_INPUT_SIV,
|
||||
OPCODE_DCL_INPUT_PS,
|
||||
OPCODE_DCL_INPUT_PS_SGV,
|
||||
OPCODE_DCL_INPUT_PS_SIV,
|
||||
OPCODE_DCL_OUTPUT,
|
||||
OPCODE_DCL_OUTPUT_SGV,
|
||||
OPCODE_DCL_OUTPUT_SIV,
|
||||
OPCODE_DCL_TEMPS,
|
||||
OPCODE_DCL_INDEXABLE_TEMP,
|
||||
OPCODE_DCL_GLOBAL_FLAGS,
|
||||
|
||||
// -----------------------------------------------
|
||||
|
||||
OPCODE_RESERVED_10,
|
||||
|
||||
// ---------- DX 10.1 op codes---------------------
|
||||
|
||||
OPCODE_LOD,
|
||||
OPCODE_GATHER4,
|
||||
OPCODE_SAMPLE_POS,
|
||||
OPCODE_SAMPLE_INFO,
|
||||
|
||||
// -----------------------------------------------
|
||||
|
||||
// This should be 10.1's version of NUM_OPCODES
|
||||
OPCODE_RESERVED_10_1,
|
||||
|
||||
// ---------- DX 11 op codes---------------------
|
||||
OPCODE_HS_DECLS, // token marks beginning of HS sub-shader
|
||||
OPCODE_HS_CONTROL_POINT_PHASE, // token marks beginning of HS sub-shader
|
||||
OPCODE_HS_FORK_PHASE, // token marks beginning of HS sub-shader
|
||||
OPCODE_HS_JOIN_PHASE, // token marks beginning of HS sub-shader
|
||||
|
||||
OPCODE_EMIT_STREAM,
|
||||
OPCODE_CUT_STREAM,
|
||||
OPCODE_EMITTHENCUT_STREAM,
|
||||
OPCODE_INTERFACE_CALL,
|
||||
|
||||
OPCODE_BUFINFO,
|
||||
OPCODE_DERIV_RTX_COARSE,
|
||||
OPCODE_DERIV_RTX_FINE,
|
||||
OPCODE_DERIV_RTY_COARSE,
|
||||
OPCODE_DERIV_RTY_FINE,
|
||||
OPCODE_GATHER4_C,
|
||||
OPCODE_GATHER4_PO,
|
||||
OPCODE_GATHER4_PO_C,
|
||||
OPCODE_RCP,
|
||||
OPCODE_F32TOF16,
|
||||
OPCODE_F16TOF32,
|
||||
OPCODE_UADDC,
|
||||
OPCODE_USUBB,
|
||||
OPCODE_COUNTBITS,
|
||||
OPCODE_FIRSTBIT_HI,
|
||||
OPCODE_FIRSTBIT_LO,
|
||||
OPCODE_FIRSTBIT_SHI,
|
||||
OPCODE_UBFE,
|
||||
OPCODE_IBFE,
|
||||
OPCODE_BFI,
|
||||
OPCODE_BFREV,
|
||||
OPCODE_SWAPC,
|
||||
|
||||
OPCODE_DCL_STREAM,
|
||||
OPCODE_DCL_FUNCTION_BODY,
|
||||
OPCODE_DCL_FUNCTION_TABLE,
|
||||
OPCODE_DCL_INTERFACE,
|
||||
|
||||
OPCODE_DCL_INPUT_CONTROL_POINT_COUNT,
|
||||
OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT,
|
||||
OPCODE_DCL_TESS_DOMAIN,
|
||||
OPCODE_DCL_TESS_PARTITIONING,
|
||||
OPCODE_DCL_TESS_OUTPUT_PRIMITIVE,
|
||||
OPCODE_DCL_HS_MAX_TESSFACTOR,
|
||||
OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT,
|
||||
OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT,
|
||||
|
||||
OPCODE_DCL_THREAD_GROUP,
|
||||
OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED,
|
||||
OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW,
|
||||
OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED,
|
||||
OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW,
|
||||
OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED,
|
||||
OPCODE_DCL_RESOURCE_RAW,
|
||||
OPCODE_DCL_RESOURCE_STRUCTURED,
|
||||
OPCODE_LD_UAV_TYPED,
|
||||
OPCODE_STORE_UAV_TYPED,
|
||||
OPCODE_LD_RAW,
|
||||
OPCODE_STORE_RAW,
|
||||
OPCODE_LD_STRUCTURED,
|
||||
OPCODE_STORE_STRUCTURED,
|
||||
OPCODE_ATOMIC_AND,
|
||||
OPCODE_ATOMIC_OR,
|
||||
OPCODE_ATOMIC_XOR,
|
||||
OPCODE_ATOMIC_CMP_STORE,
|
||||
OPCODE_ATOMIC_IADD,
|
||||
OPCODE_ATOMIC_IMAX,
|
||||
OPCODE_ATOMIC_IMIN,
|
||||
OPCODE_ATOMIC_UMAX,
|
||||
OPCODE_ATOMIC_UMIN,
|
||||
OPCODE_IMM_ATOMIC_ALLOC,
|
||||
OPCODE_IMM_ATOMIC_CONSUME,
|
||||
OPCODE_IMM_ATOMIC_IADD,
|
||||
OPCODE_IMM_ATOMIC_AND,
|
||||
OPCODE_IMM_ATOMIC_OR,
|
||||
OPCODE_IMM_ATOMIC_XOR,
|
||||
OPCODE_IMM_ATOMIC_EXCH,
|
||||
OPCODE_IMM_ATOMIC_CMP_EXCH,
|
||||
OPCODE_IMM_ATOMIC_IMAX,
|
||||
OPCODE_IMM_ATOMIC_IMIN,
|
||||
OPCODE_IMM_ATOMIC_UMAX,
|
||||
OPCODE_IMM_ATOMIC_UMIN,
|
||||
OPCODE_SYNC,
|
||||
|
||||
OPCODE_DADD,
|
||||
OPCODE_DMAX,
|
||||
OPCODE_DMIN,
|
||||
OPCODE_DMUL,
|
||||
OPCODE_DEQ,
|
||||
OPCODE_DGE,
|
||||
OPCODE_DLT,
|
||||
OPCODE_DNE,
|
||||
OPCODE_DMOV,
|
||||
OPCODE_DMOVC,
|
||||
OPCODE_DTOF,
|
||||
OPCODE_FTOD,
|
||||
|
||||
OPCODE_EVAL_SNAPPED,
|
||||
OPCODE_EVAL_SAMPLE_INDEX,
|
||||
OPCODE_EVAL_CENTROID,
|
||||
|
||||
OPCODE_DCL_GS_INSTANCE_COUNT,
|
||||
|
||||
OPCODE_ABORT,
|
||||
OPCODE_DEBUG_BREAK,
|
||||
|
||||
// -----------------------------------------------
|
||||
|
||||
// This marks the end of D3D11.0 opcodes
|
||||
OPCODE_RESERVED_11,
|
||||
|
||||
OPCODE_DDIV,
|
||||
OPCODE_DFMA,
|
||||
OPCODE_DRCP,
|
||||
|
||||
OPCODE_MSAD,
|
||||
|
||||
OPCODE_DTOI,
|
||||
OPCODE_DTOU,
|
||||
OPCODE_ITOD,
|
||||
OPCODE_UTOD,
|
||||
|
||||
// -----------------------------------------------
|
||||
|
||||
// This marks the end of D3D11.1 opcodes
|
||||
OPCODE_RESERVED_11_1,
|
||||
|
||||
NUM_OPCODES,
|
||||
OPCODE_INVALID = NUM_OPCODES,
|
||||
};
|
||||
|
||||
static OPCODE_TYPE DecodeOpcodeType(uint32_t ui32Token)
|
||||
{
|
||||
return (OPCODE_TYPE)(ui32Token & 0x00007ff);
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INDEX_0D,
|
||||
INDEX_1D,
|
||||
INDEX_2D,
|
||||
INDEX_3D,
|
||||
} OPERAND_INDEX_DIMENSION;
|
||||
|
||||
static OPERAND_INDEX_DIMENSION DecodeOperandIndexDimension(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_INDEX_DIMENSION)((ui32Token & 0x00300000) >> 20);
|
||||
}
|
||||
|
||||
typedef enum OPERAND_TYPE
|
||||
{
|
||||
OPERAND_TYPE_SPECIAL_LOOPCOUNTER = -10,
|
||||
OPERAND_TYPE_SPECIAL_IMMCONSTINT = -9,
|
||||
OPERAND_TYPE_SPECIAL_TEXCOORD = -8,
|
||||
OPERAND_TYPE_SPECIAL_POSITION = -7,
|
||||
OPERAND_TYPE_SPECIAL_FOG = -6,
|
||||
OPERAND_TYPE_SPECIAL_POINTSIZE = -5,
|
||||
OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR = -4,
|
||||
OPERAND_TYPE_SPECIAL_OUTBASECOLOUR = -3,
|
||||
OPERAND_TYPE_SPECIAL_ADDRESS = -2,
|
||||
OPERAND_TYPE_SPECIAL_IMMCONST = -1,
|
||||
OPERAND_TYPE_TEMP = 0, // Temporary Register File
|
||||
OPERAND_TYPE_INPUT = 1, // General Input Register File
|
||||
OPERAND_TYPE_OUTPUT = 2, // General Output Register File
|
||||
OPERAND_TYPE_INDEXABLE_TEMP = 3, // Temporary Register File (indexable)
|
||||
OPERAND_TYPE_IMMEDIATE32 = 4, // 32bit/component immediate value(s)
|
||||
// If for example, operand token bits
|
||||
// [01:00]==OPERAND_4_COMPONENT,
|
||||
// this means that the operand type:
|
||||
// OPERAND_TYPE_IMMEDIATE32
|
||||
// results in 4 additional 32bit
|
||||
// DWORDS present for the operand.
|
||||
OPERAND_TYPE_IMMEDIATE64 = 5, // 64bit/comp.imm.val(s)HI:LO
|
||||
OPERAND_TYPE_SAMPLER = 6, // Reference to sampler state
|
||||
OPERAND_TYPE_RESOURCE = 7, // Reference to memory resource (e.g. texture)
|
||||
OPERAND_TYPE_CONSTANT_BUFFER = 8, // Reference to constant buffer
|
||||
OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER = 9, // Reference to immediate constant buffer
|
||||
OPERAND_TYPE_LABEL = 10, // Label
|
||||
OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID
|
||||
OPERAND_TYPE_OUTPUT_DEPTH = 12, // Output Depth
|
||||
OPERAND_TYPE_NULL = 13, // Null register, used to discard results of operations
|
||||
// Below Are operands new in DX 10.1
|
||||
OPERAND_TYPE_RASTERIZER = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources
|
||||
OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar)
|
||||
// Below Are operands new in DX 11
|
||||
OPERAND_TYPE_STREAM = 16, // Reference to GS stream output resource
|
||||
OPERAND_TYPE_FUNCTION_BODY = 17, // Reference to a function definition
|
||||
OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class
|
||||
OPERAND_TYPE_INTERFACE = 19, // Reference to an interface
|
||||
OPERAND_TYPE_FUNCTION_INPUT = 20, // Reference to an input parameter to a function
|
||||
OPERAND_TYPE_FUNCTION_OUTPUT = 21, // Reference to an output parameter to a function
|
||||
OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID = 22, // HS Control Point phase input saying which output control point ID this is
|
||||
OPERAND_TYPE_INPUT_FORK_INSTANCE_ID = 23, // HS Fork Phase input instance ID
|
||||
OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID = 24, // HS Join Phase input instance ID
|
||||
OPERAND_TYPE_INPUT_CONTROL_POINT = 25, // HS Fork+Join, DS phase input control points (array of them)
|
||||
OPERAND_TYPE_OUTPUT_CONTROL_POINT = 26, // HS Fork+Join phase output control points (array of them)
|
||||
OPERAND_TYPE_INPUT_PATCH_CONSTANT = 27, // DS+HSJoin Input Patch Constants (array of them)
|
||||
OPERAND_TYPE_INPUT_DOMAIN_POINT = 28, // DS Input Domain point
|
||||
OPERAND_TYPE_THIS_POINTER = 29, // Reference to an interface this pointer
|
||||
OPERAND_TYPE_UNORDERED_ACCESS_VIEW = 30, // Reference to UAV u#
|
||||
OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY = 31, // Reference to Thread Group Shared Memory g#
|
||||
OPERAND_TYPE_INPUT_THREAD_ID = 32, // Compute Shader Thread ID
|
||||
OPERAND_TYPE_INPUT_THREAD_GROUP_ID = 33, // Compute Shader Thread Group ID
|
||||
OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP = 34, // Compute Shader Thread ID In Thread Group
|
||||
OPERAND_TYPE_INPUT_COVERAGE_MASK = 35, // Pixel shader coverage mask input
|
||||
OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED = 36, // Compute Shader Thread ID In Group Flattened to a 1D value.
|
||||
OPERAND_TYPE_INPUT_GS_INSTANCE_ID = 37, // Input GS instance ID
|
||||
OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL = 38, // Output Depth, forced to be greater than or equal than current depth
|
||||
OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL = 39, // Output Depth, forced to be less than or equal to current depth
|
||||
OPERAND_TYPE_CYCLE_COUNTER = 40, // Cycle counter
|
||||
} OPERAND_TYPE;
|
||||
|
||||
static OPERAND_TYPE DecodeOperandType(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_TYPE)((ui32Token & 0x000ff000) >> 12);
|
||||
}
|
||||
|
||||
static SPECIAL_NAME DecodeOperandSpecialName(uint32_t ui32Token)
|
||||
{
|
||||
return (SPECIAL_NAME)(ui32Token & 0x0000ffff);
|
||||
}
|
||||
|
||||
typedef enum OPERAND_INDEX_REPRESENTATION
|
||||
{
|
||||
OPERAND_INDEX_IMMEDIATE32 = 0, // Extra DWORD
|
||||
OPERAND_INDEX_IMMEDIATE64 = 1, // 2 Extra DWORDs
|
||||
// (HI32:LO32)
|
||||
OPERAND_INDEX_RELATIVE = 2, // Extra operand
|
||||
OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by
|
||||
// extra operand
|
||||
OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS
|
||||
// (HI32:LO32) followed
|
||||
// by extra operand
|
||||
} OPERAND_INDEX_REPRESENTATION;
|
||||
|
||||
static OPERAND_INDEX_REPRESENTATION DecodeOperandIndexRepresentation(uint32_t ui32Dimension, uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_INDEX_REPRESENTATION)((ui32Token & (0x3 << (22 + 3 * ((ui32Dimension) & 3)))) >> (22 + 3 * ((ui32Dimension) & 3)));
|
||||
}
|
||||
|
||||
typedef enum OPERAND_NUM_COMPONENTS
|
||||
{
|
||||
OPERAND_0_COMPONENT = 0,
|
||||
OPERAND_1_COMPONENT = 1,
|
||||
OPERAND_4_COMPONENT = 2,
|
||||
OPERAND_N_COMPONENT = 3 // unused for now
|
||||
} OPERAND_NUM_COMPONENTS;
|
||||
|
||||
static OPERAND_NUM_COMPONENTS DecodeOperandNumComponents(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_NUM_COMPONENTS)(ui32Token & 0x00000003);
|
||||
}
|
||||
|
||||
typedef enum OPERAND_4_COMPONENT_SELECTION_MODE
|
||||
{
|
||||
OPERAND_4_COMPONENT_MASK_MODE = 0, // mask 4 components
|
||||
OPERAND_4_COMPONENT_SWIZZLE_MODE = 1, // swizzle 4 components
|
||||
OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components
|
||||
} OPERAND_4_COMPONENT_SELECTION_MODE;
|
||||
|
||||
static OPERAND_4_COMPONENT_SELECTION_MODE DecodeOperand4CompSelMode(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_4_COMPONENT_SELECTION_MODE)((ui32Token & 0x0000000c) >> 2);
|
||||
}
|
||||
|
||||
#define OPERAND_4_COMPONENT_MASK_X 0x00000001
|
||||
#define OPERAND_4_COMPONENT_MASK_Y 0x00000002
|
||||
#define OPERAND_4_COMPONENT_MASK_Z 0x00000004
|
||||
#define OPERAND_4_COMPONENT_MASK_W 0x00000008
|
||||
#define OPERAND_4_COMPONENT_MASK_R OPERAND_4_COMPONENT_MASK_X
|
||||
#define OPERAND_4_COMPONENT_MASK_G OPERAND_4_COMPONENT_MASK_Y
|
||||
#define OPERAND_4_COMPONENT_MASK_B OPERAND_4_COMPONENT_MASK_Z
|
||||
#define OPERAND_4_COMPONENT_MASK_A OPERAND_4_COMPONENT_MASK_W
|
||||
#define OPERAND_4_COMPONENT_MASK_ALL 0x0000000f
|
||||
|
||||
static uint32_t DecodeOperand4CompMask(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0x000000f0) >> 4);
|
||||
}
|
||||
|
||||
static uint32_t DecodeOperand4CompSwizzle(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0x00000ff0) >> 4);
|
||||
}
|
||||
|
||||
static uint32_t DecodeOperand4CompSel1(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0x00000030) >> 4);
|
||||
}
|
||||
|
||||
#define OPERAND_4_COMPONENT_X 0
|
||||
#define OPERAND_4_COMPONENT_Y 1
|
||||
#define OPERAND_4_COMPONENT_Z 2
|
||||
#define OPERAND_4_COMPONENT_W 3
|
||||
|
||||
static const uint32_t NO_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_W << 6)) /*<<4*/);
|
||||
|
||||
static const uint32_t XXXX_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_X << 2) | (OPERAND_4_COMPONENT_X << 4) | (OPERAND_4_COMPONENT_X << 6)));
|
||||
static const uint32_t YYYY_SWIZZLE = (((OPERAND_4_COMPONENT_Y) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Y << 4) | (OPERAND_4_COMPONENT_Y << 6)));
|
||||
static const uint32_t ZZZZ_SWIZZLE = (((OPERAND_4_COMPONENT_Z) | (OPERAND_4_COMPONENT_Z << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_Z << 6)));
|
||||
static const uint32_t WWWW_SWIZZLE = (((OPERAND_4_COMPONENT_W) | (OPERAND_4_COMPONENT_W << 2) | (OPERAND_4_COMPONENT_W << 4) | (OPERAND_4_COMPONENT_W << 6)));
|
||||
|
||||
static uint32_t DecodeOperand4CompSwizzleSource(uint32_t ui32Token, uint32_t comp)
|
||||
{
|
||||
return (uint32_t)(((ui32Token) >> (4 + 2 * ((comp) & 3))) & 3);
|
||||
}
|
||||
|
||||
typedef enum RESOURCE_DIMENSION
|
||||
{
|
||||
RESOURCE_DIMENSION_UNKNOWN = 0,
|
||||
RESOURCE_DIMENSION_BUFFER = 1,
|
||||
RESOURCE_DIMENSION_TEXTURE1D = 2,
|
||||
RESOURCE_DIMENSION_TEXTURE2D = 3,
|
||||
RESOURCE_DIMENSION_TEXTURE2DMS = 4,
|
||||
RESOURCE_DIMENSION_TEXTURE3D = 5,
|
||||
RESOURCE_DIMENSION_TEXTURECUBE = 6,
|
||||
RESOURCE_DIMENSION_TEXTURE1DARRAY = 7,
|
||||
RESOURCE_DIMENSION_TEXTURE2DARRAY = 8,
|
||||
RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9,
|
||||
RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
|
||||
RESOURCE_DIMENSION_RAW_BUFFER = 11,
|
||||
RESOURCE_DIMENSION_STRUCTURED_BUFFER = 12,
|
||||
} RESOURCE_DIMENSION;
|
||||
|
||||
static RESOURCE_DIMENSION DecodeResourceDimension(uint32_t ui32Token)
|
||||
{
|
||||
return (RESOURCE_DIMENSION)((ui32Token & 0x0000f800) >> 11);
|
||||
}
|
||||
|
||||
static RESOURCE_DIMENSION DecodeExtendedResourceDimension(uint32_t ui32Token)
|
||||
{
|
||||
return (RESOURCE_DIMENSION)((ui32Token & 0x000007C0) >> 6);
|
||||
}
|
||||
|
||||
typedef enum INSTRUCTION_TEST_BOOLEAN
|
||||
{
|
||||
INSTRUCTION_TEST_ZERO = 0,
|
||||
INSTRUCTION_TEST_NONZERO = 1
|
||||
} INSTRUCTION_TEST_BOOLEAN;
|
||||
|
||||
static INSTRUCTION_TEST_BOOLEAN DecodeInstrTestBool(uint32_t ui32Token)
|
||||
{
|
||||
return (INSTRUCTION_TEST_BOOLEAN)((ui32Token & 0x00040000) >> 18);
|
||||
}
|
||||
|
||||
static uint32_t DecodeIsOperandExtended(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x80000000) >> 31;
|
||||
}
|
||||
|
||||
typedef enum EXTENDED_OPERAND_TYPE
|
||||
{
|
||||
EXTENDED_OPERAND_EMPTY = 0,
|
||||
EXTENDED_OPERAND_MODIFIER = 1,
|
||||
} EXTENDED_OPERAND_TYPE;
|
||||
|
||||
static EXTENDED_OPERAND_TYPE DecodeExtendedOperandType(uint32_t ui32Token)
|
||||
{
|
||||
return (EXTENDED_OPERAND_TYPE)(ui32Token & 0x0000003f);
|
||||
}
|
||||
|
||||
typedef enum OPERAND_MODIFIER
|
||||
{
|
||||
OPERAND_MODIFIER_NONE = 0,
|
||||
OPERAND_MODIFIER_NEG = 1,
|
||||
OPERAND_MODIFIER_ABS = 2,
|
||||
OPERAND_MODIFIER_ABSNEG = 3,
|
||||
} OPERAND_MODIFIER;
|
||||
|
||||
static OPERAND_MODIFIER DecodeExtendedOperandModifier(uint32_t ui32Token)
|
||||
{
|
||||
return (OPERAND_MODIFIER)((ui32Token & 0x00003fc0) >> 6);
|
||||
}
|
||||
|
||||
static const uint32_t GLOBAL_FLAG_REFACTORING_ALLOWED = (1 << 11);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = (1 << 12);
|
||||
static const uint32_t GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL = (1 << 13);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS = (1 << 14);
|
||||
static const uint32_t GLOBAL_FLAG_SKIP_OPTIMIZATION = (1 << 15);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION = (1 << 16);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS = (1 << 17);
|
||||
static const uint32_t GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS = (1 << 18);
|
||||
|
||||
static uint32_t DecodeGlobalFlags(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)(ui32Token & 0x00fff800);
|
||||
}
|
||||
|
||||
static INTERPOLATION_MODE DecodeInterpolationMode(uint32_t ui32Token)
|
||||
{
|
||||
return (INTERPOLATION_MODE)((ui32Token & 0x00007800) >> 11);
|
||||
}
|
||||
|
||||
typedef enum PRIMITIVE_TOPOLOGY
|
||||
{
|
||||
PRIMITIVE_TOPOLOGY_UNDEFINED = 0,
|
||||
PRIMITIVE_TOPOLOGY_POINTLIST = 1,
|
||||
PRIMITIVE_TOPOLOGY_LINELIST = 2,
|
||||
PRIMITIVE_TOPOLOGY_LINESTRIP = 3,
|
||||
PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4,
|
||||
PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5,
|
||||
// 6 is reserved for legacy triangle fans
|
||||
// Adjacency values should be equal to (0x8 & non-adjacency):
|
||||
PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10,
|
||||
PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11,
|
||||
PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12,
|
||||
PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13,
|
||||
} PRIMITIVE_TOPOLOGY;
|
||||
|
||||
static PRIMITIVE_TOPOLOGY DecodeGSOutputPrimitiveTopology(uint32_t ui32Token)
|
||||
{
|
||||
return (PRIMITIVE_TOPOLOGY)((ui32Token & 0x0001f800) >> 11);
|
||||
}
|
||||
|
||||
typedef enum PRIMITIVE
|
||||
{
|
||||
PRIMITIVE_UNDEFINED = 0,
|
||||
PRIMITIVE_POINT = 1,
|
||||
PRIMITIVE_LINE = 2,
|
||||
PRIMITIVE_TRIANGLE = 3,
|
||||
// Adjacency values should be equal to (0x4 & non-adjacency):
|
||||
PRIMITIVE_LINE_ADJ = 6,
|
||||
PRIMITIVE_TRIANGLE_ADJ = 7,
|
||||
PRIMITIVE_1_CONTROL_POINT_PATCH = 8,
|
||||
PRIMITIVE_2_CONTROL_POINT_PATCH = 9,
|
||||
PRIMITIVE_3_CONTROL_POINT_PATCH = 10,
|
||||
PRIMITIVE_4_CONTROL_POINT_PATCH = 11,
|
||||
PRIMITIVE_5_CONTROL_POINT_PATCH = 12,
|
||||
PRIMITIVE_6_CONTROL_POINT_PATCH = 13,
|
||||
PRIMITIVE_7_CONTROL_POINT_PATCH = 14,
|
||||
PRIMITIVE_8_CONTROL_POINT_PATCH = 15,
|
||||
PRIMITIVE_9_CONTROL_POINT_PATCH = 16,
|
||||
PRIMITIVE_10_CONTROL_POINT_PATCH = 17,
|
||||
PRIMITIVE_11_CONTROL_POINT_PATCH = 18,
|
||||
PRIMITIVE_12_CONTROL_POINT_PATCH = 19,
|
||||
PRIMITIVE_13_CONTROL_POINT_PATCH = 20,
|
||||
PRIMITIVE_14_CONTROL_POINT_PATCH = 21,
|
||||
PRIMITIVE_15_CONTROL_POINT_PATCH = 22,
|
||||
PRIMITIVE_16_CONTROL_POINT_PATCH = 23,
|
||||
PRIMITIVE_17_CONTROL_POINT_PATCH = 24,
|
||||
PRIMITIVE_18_CONTROL_POINT_PATCH = 25,
|
||||
PRIMITIVE_19_CONTROL_POINT_PATCH = 26,
|
||||
PRIMITIVE_20_CONTROL_POINT_PATCH = 27,
|
||||
PRIMITIVE_21_CONTROL_POINT_PATCH = 28,
|
||||
PRIMITIVE_22_CONTROL_POINT_PATCH = 29,
|
||||
PRIMITIVE_23_CONTROL_POINT_PATCH = 30,
|
||||
PRIMITIVE_24_CONTROL_POINT_PATCH = 31,
|
||||
PRIMITIVE_25_CONTROL_POINT_PATCH = 32,
|
||||
PRIMITIVE_26_CONTROL_POINT_PATCH = 33,
|
||||
PRIMITIVE_27_CONTROL_POINT_PATCH = 34,
|
||||
PRIMITIVE_28_CONTROL_POINT_PATCH = 35,
|
||||
PRIMITIVE_29_CONTROL_POINT_PATCH = 36,
|
||||
PRIMITIVE_30_CONTROL_POINT_PATCH = 37,
|
||||
PRIMITIVE_31_CONTROL_POINT_PATCH = 38,
|
||||
PRIMITIVE_32_CONTROL_POINT_PATCH = 39,
|
||||
} PRIMITIVE;
|
||||
|
||||
static PRIMITIVE DecodeGSInputPrimitive(uint32_t ui32Token)
|
||||
{
|
||||
return (PRIMITIVE)((ui32Token & 0x0001f800) >> 11);
|
||||
}
|
||||
|
||||
static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token)
|
||||
{
|
||||
return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11);
|
||||
}
|
||||
|
||||
static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token)
|
||||
{
|
||||
return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11);
|
||||
}
|
||||
|
||||
static TESSELLATOR_OUTPUT_PRIMITIVE DecodeTessOutPrim(uint32_t ui32Token)
|
||||
{
|
||||
return (TESSELLATOR_OUTPUT_PRIMITIVE)((ui32Token & 0x00003800) >> 11);
|
||||
}
|
||||
|
||||
static const uint32_t SYNC_THREADS_IN_GROUP = 0x00000800;
|
||||
static const uint32_t SYNC_THREAD_GROUP_SHARED_MEMORY = 0x00001000;
|
||||
static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP = 0x00002000;
|
||||
static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL = 0x00004000;
|
||||
|
||||
static uint32_t DecodeSyncFlags(uint32_t ui32Token)
|
||||
{
|
||||
return ui32Token & 0x00007800;
|
||||
}
|
||||
|
||||
// The number of types that implement this interface
|
||||
static uint32_t DecodeInterfaceTableLength(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0x0000ffff) >> 0);
|
||||
}
|
||||
|
||||
// The number of interfaces that are defined in this array.
|
||||
static uint32_t DecodeInterfaceArrayLength(uint32_t ui32Token)
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0xffff0000) >> 16);
|
||||
}
|
||||
|
||||
typedef enum CUSTOMDATA_CLASS
|
||||
{
|
||||
CUSTOMDATA_COMMENT = 0,
|
||||
CUSTOMDATA_DEBUGINFO,
|
||||
CUSTOMDATA_OPAQUE,
|
||||
CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER,
|
||||
CUSTOMDATA_SHADER_MESSAGE,
|
||||
} CUSTOMDATA_CLASS;
|
||||
|
||||
static CUSTOMDATA_CLASS DecodeCustomDataClass(uint32_t ui32Token)
|
||||
{
|
||||
return (CUSTOMDATA_CLASS)((ui32Token & 0xfffff800) >> 11);
|
||||
}
|
||||
|
||||
static uint32_t DecodeInstructionSaturate(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x00002000) ? 1 : 0;
|
||||
}
|
||||
|
||||
static uint32_t DecodeInstructionPreciseMask(uint32_t ui32Token) // "precise" keyword
|
||||
{
|
||||
return (uint32_t)((ui32Token & 0x00780000) >> 19);
|
||||
}
|
||||
|
||||
typedef enum OPERAND_MIN_PRECISION
|
||||
{
|
||||
OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision
|
||||
// for the shader model
|
||||
OPERAND_MIN_PRECISION_FLOAT_16 = 1, // Min 16 bit/component float
|
||||
OPERAND_MIN_PRECISION_FLOAT_2_8 = 2, // Min 10(2.8)bit/comp. float
|
||||
OPERAND_MIN_PRECISION_SINT_16 = 4, // Min 16 bit/comp. signed integer
|
||||
OPERAND_MIN_PRECISION_UINT_16 = 5, // Min 16 bit/comp. unsigned integer
|
||||
} OPERAND_MIN_PRECISION;
|
||||
|
||||
static uint32_t DecodeOperandMinPrecision(uint32_t ui32Token)
|
||||
{
|
||||
return (ui32Token & 0x0001C000) >> 14;
|
||||
}
|
||||
|
||||
static uint32_t DecodeOutputControlPointCount(uint32_t ui32Token)
|
||||
{
|
||||
return ((ui32Token & 0x0001f800) >> 11);
|
||||
}
|
||||
|
||||
typedef enum IMMEDIATE_ADDRESS_OFFSET_COORD
|
||||
{
|
||||
IMMEDIATE_ADDRESS_OFFSET_U = 0,
|
||||
IMMEDIATE_ADDRESS_OFFSET_V = 1,
|
||||
IMMEDIATE_ADDRESS_OFFSET_W = 2,
|
||||
} IMMEDIATE_ADDRESS_OFFSET_COORD;
|
||||
|
||||
|
||||
#define IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&3))
|
||||
#define IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<<IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord))
|
||||
|
||||
static uint32_t DecodeImmediateAddressOffset(IMMEDIATE_ADDRESS_OFFSET_COORD eCoord, uint32_t ui32Token)
|
||||
{
|
||||
return ((((ui32Token) & IMMEDIATE_ADDRESS_OFFSET_MASK(eCoord)) >> (IMMEDIATE_ADDRESS_OFFSET_SHIFT(eCoord))));
|
||||
}
|
||||
|
||||
// UAV access scope flags
|
||||
static const uint32_t GLOBALLY_COHERENT_ACCESS = 0x00010000;
|
||||
static uint32_t DecodeAccessCoherencyFlags(uint32_t ui32Token)
|
||||
{
|
||||
return ui32Token & 0x00010000;
|
||||
}
|
||||
|
||||
typedef enum RESINFO_RETURN_TYPE
|
||||
{
|
||||
RESINFO_INSTRUCTION_RETURN_FLOAT = 0,
|
||||
RESINFO_INSTRUCTION_RETURN_RCPFLOAT = 1,
|
||||
RESINFO_INSTRUCTION_RETURN_UINT = 2
|
||||
} RESINFO_RETURN_TYPE;
|
||||
|
||||
static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token)
|
||||
{
|
||||
return (RESINFO_RETURN_TYPE)((ui32Token & 0x00001800) >> 11);
|
||||
}
|
||||
|
||||
typedef enum SB_SAMPLER_MODE
|
||||
{
|
||||
D3D10_SB_SAMPLER_MODE_DEFAULT = 0,
|
||||
D3D10_SB_SAMPLER_MODE_COMPARISON = 1,
|
||||
D3D10_SB_SAMPLER_MODE_MONO = 2,
|
||||
} SB_SAMPLER_MODE;
|
||||
|
||||
static SB_SAMPLER_MODE DecodeSamplerMode(uint32_t ui32Token)
|
||||
{
|
||||
return (SB_SAMPLER_MODE)((ui32Token & 0x00001800) >> 11);
|
||||
}
|
||||
|
||||
#endif
|
620
third_party/HLSLcc/src/reflect.cpp
vendored
Normal file
620
third_party/HLSLcc/src/reflect.cpp
vendored
Normal file
@ -0,0 +1,620 @@
|
||||
#include "internal_includes/reflect.h"
|
||||
#include "internal_includes/debug.h"
|
||||
#include "internal_includes/decode.h"
|
||||
#include "bstrlib.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static void FormatVariableName(std::string & Name)
|
||||
{
|
||||
/* MSDN http://msdn.microsoft.com/en-us/library/windows/desktop/bb944006(v=vs.85).aspx
|
||||
The uniform function parameters appear in the
|
||||
constant table prepended with a dollar sign ($),
|
||||
unlike the global variables. The dollar sign is
|
||||
required to avoid name collisions between local
|
||||
uniform inputs and global variables of the same name.*/
|
||||
|
||||
/* Leave $ThisPointer, $Element and $Globals as-is.
|
||||
Otherwise remove $ character ($ is not a valid character for GLSL variable names). */
|
||||
if (Name[0] == '$')
|
||||
{
|
||||
if (strcmp(Name.c_str(), "$Element") != 0 &&
|
||||
strcmp(Name.c_str(), "$Globals") != 0 &&
|
||||
strcmp(Name.c_str(), "$ThisPointer") != 0)
|
||||
{
|
||||
Name[0] = '_';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::string ReadStringFromTokenStream(const uint32_t* tokens)
|
||||
{
|
||||
char* charTokens = (char*)tokens;
|
||||
return std::string(charTokens);
|
||||
}
|
||||
|
||||
static int MaskToRebaseOffset(const uint32_t mask)
|
||||
{
|
||||
int res = 0;
|
||||
uint32_t m = mask;
|
||||
while ((m & 1) == 0)
|
||||
{
|
||||
res++;
|
||||
m = m >> 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static void ReadInputSignatures(const uint32_t* pui32Tokens,
|
||||
ShaderInfo* psShaderInfo,
|
||||
const int extended)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
const uint32_t* pui32FirstSignatureToken = pui32Tokens;
|
||||
const uint32_t ui32ElementCount = *pui32Tokens++;
|
||||
/* const uint32_t ui32Key = * */ pui32Tokens++;
|
||||
|
||||
psShaderInfo->psInputSignatures.clear();
|
||||
psShaderInfo->psInputSignatures.resize(ui32ElementCount);
|
||||
|
||||
for (i = 0; i < ui32ElementCount; ++i)
|
||||
{
|
||||
uint32_t ui32ComponentMasks;
|
||||
ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psInputSignatures[i];
|
||||
uint32_t ui32SemanticNameOffset;
|
||||
|
||||
psCurrentSignature->ui32Stream = 0;
|
||||
psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
|
||||
|
||||
if (extended)
|
||||
psCurrentSignature->ui32Stream = *pui32Tokens++;
|
||||
|
||||
ui32SemanticNameOffset = *pui32Tokens++;
|
||||
psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
|
||||
psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++;
|
||||
psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++;
|
||||
psCurrentSignature->ui32Register = *pui32Tokens++;
|
||||
|
||||
ui32ComponentMasks = *pui32Tokens++;
|
||||
psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
|
||||
//Shows which components are read
|
||||
psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
|
||||
psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
|
||||
|
||||
if (extended)
|
||||
psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++;
|
||||
|
||||
psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset));
|
||||
}
|
||||
}
|
||||
|
||||
static void ReadOutputSignatures(const uint32_t* pui32Tokens,
|
||||
ShaderInfo* psShaderInfo,
|
||||
const int minPrec,
|
||||
const int streams)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
const uint32_t* pui32FirstSignatureToken = pui32Tokens;
|
||||
const uint32_t ui32ElementCount = *pui32Tokens++;
|
||||
/*const uint32_t ui32Key = * */ pui32Tokens++;
|
||||
|
||||
psShaderInfo->psOutputSignatures.clear();
|
||||
psShaderInfo->psOutputSignatures.resize(ui32ElementCount);
|
||||
|
||||
for (i = 0; i < ui32ElementCount; ++i)
|
||||
{
|
||||
uint32_t ui32ComponentMasks;
|
||||
ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psOutputSignatures[i];
|
||||
uint32_t ui32SemanticNameOffset;
|
||||
|
||||
psCurrentSignature->ui32Stream = 0;
|
||||
psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
|
||||
|
||||
if (streams)
|
||||
psCurrentSignature->ui32Stream = *pui32Tokens++;
|
||||
|
||||
ui32SemanticNameOffset = *pui32Tokens++;
|
||||
psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
|
||||
psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++;
|
||||
psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++;
|
||||
psCurrentSignature->ui32Register = *pui32Tokens++;
|
||||
|
||||
// Massage some special inputs/outputs to match the types of GLSL counterparts
|
||||
if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX)
|
||||
{
|
||||
psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32;
|
||||
}
|
||||
|
||||
ui32ComponentMasks = *pui32Tokens++;
|
||||
psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
|
||||
//Shows which components are NEVER written.
|
||||
psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
|
||||
psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
|
||||
|
||||
if (minPrec)
|
||||
psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++;
|
||||
|
||||
psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset));
|
||||
}
|
||||
}
|
||||
|
||||
static void ReadPatchConstantSignatures(const uint32_t* pui32Tokens,
|
||||
ShaderInfo* psShaderInfo,
|
||||
const int minPrec,
|
||||
const int streams)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
const uint32_t* pui32FirstSignatureToken = pui32Tokens;
|
||||
const uint32_t ui32ElementCount = *pui32Tokens++;
|
||||
/*const uint32_t ui32Key = * */ pui32Tokens++;
|
||||
|
||||
psShaderInfo->psPatchConstantSignatures.clear();
|
||||
psShaderInfo->psPatchConstantSignatures.resize(ui32ElementCount);
|
||||
|
||||
for (i = 0; i < ui32ElementCount; ++i)
|
||||
{
|
||||
uint32_t ui32ComponentMasks;
|
||||
ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psPatchConstantSignatures[i];
|
||||
uint32_t ui32SemanticNameOffset;
|
||||
|
||||
psCurrentSignature->ui32Stream = 0;
|
||||
psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT;
|
||||
|
||||
if (streams)
|
||||
psCurrentSignature->ui32Stream = *pui32Tokens++;
|
||||
|
||||
ui32SemanticNameOffset = *pui32Tokens++;
|
||||
psCurrentSignature->ui32SemanticIndex = *pui32Tokens++;
|
||||
psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++;
|
||||
psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++;
|
||||
psCurrentSignature->ui32Register = *pui32Tokens++;
|
||||
|
||||
// Massage some special inputs/outputs to match the types of GLSL counterparts
|
||||
if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX)
|
||||
{
|
||||
psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32;
|
||||
}
|
||||
|
||||
ui32ComponentMasks = *pui32Tokens++;
|
||||
psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F;
|
||||
//Shows which components are NEVER written.
|
||||
psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8;
|
||||
psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask);
|
||||
|
||||
if (minPrec)
|
||||
psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++;
|
||||
|
||||
psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset));
|
||||
}
|
||||
}
|
||||
|
||||
static const uint32_t* ReadResourceBinding(ShaderInfo* psShaderInfo, const uint32_t* pui32FirstResourceToken, const uint32_t* pui32Tokens, ResourceBinding* psBinding, uint32_t decodeFlags)
|
||||
{
|
||||
uint32_t ui32NameOffset = *pui32Tokens++;
|
||||
|
||||
psBinding->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstResourceToken + ui32NameOffset));
|
||||
FormatVariableName(psBinding->name);
|
||||
|
||||
psBinding->eType = (ResourceType) * pui32Tokens++;
|
||||
psBinding->ui32ReturnType = (RESOURCE_RETURN_TYPE)*pui32Tokens++;
|
||||
psBinding->eDimension = (REFLECT_RESOURCE_DIMENSION)*pui32Tokens++;
|
||||
psBinding->ui32NumSamples = *pui32Tokens++; // fxc generates 2^32 - 1 for non MS images
|
||||
psBinding->ui32BindPoint = *pui32Tokens++;
|
||||
psBinding->ui32BindCount = *pui32Tokens++;
|
||||
psBinding->ui32Flags = *pui32Tokens++;
|
||||
if (((psShaderInfo->ui32MajorVersion >= 5) && (psShaderInfo->ui32MinorVersion >= 1)) ||
|
||||
(psShaderInfo->ui32MajorVersion > 5))
|
||||
{
|
||||
psBinding->ui32Space = *pui32Tokens++;
|
||||
psBinding->ui32RangeID = *pui32Tokens++;
|
||||
}
|
||||
|
||||
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_UNKNOWN;
|
||||
|
||||
if (decodeFlags & HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME)
|
||||
{
|
||||
if (psBinding->name.rfind("_highp") == psBinding->name.length() - 6)
|
||||
{
|
||||
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_HIGHP;
|
||||
psBinding->name.resize(psBinding->name.length() - 6);
|
||||
}
|
||||
else if (psBinding->name.rfind("_mediump") == psBinding->name.length() - 8)
|
||||
{
|
||||
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_MEDIUMP;
|
||||
psBinding->name.resize(psBinding->name.length() - 8);
|
||||
}
|
||||
else if (psBinding->name.rfind("_lowp") == psBinding->name.length() - 5)
|
||||
{
|
||||
psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_LOWP;
|
||||
psBinding->name.resize(psBinding->name.length() - 5);
|
||||
}
|
||||
}
|
||||
|
||||
return pui32Tokens;
|
||||
}
|
||||
|
||||
//Read D3D11_SHADER_TYPE_DESC
|
||||
static void ReadShaderVariableType(const uint32_t ui32MajorVersion,
|
||||
const uint32_t* pui32FirstConstBufToken,
|
||||
const uint32_t* pui32tokens, ShaderVarType* varType)
|
||||
{
|
||||
const uint16_t* pui16Tokens = (const uint16_t*)pui32tokens;
|
||||
uint16_t ui32MemberCount;
|
||||
uint32_t ui32MemberOffset;
|
||||
const uint32_t* pui32MemberTokens;
|
||||
uint32_t i;
|
||||
|
||||
varType->Class = (SHADER_VARIABLE_CLASS)pui16Tokens[0];
|
||||
varType->Type = (SHADER_VARIABLE_TYPE)pui16Tokens[1];
|
||||
varType->Rows = pui16Tokens[2];
|
||||
varType->Columns = pui16Tokens[3];
|
||||
varType->Elements = pui16Tokens[4];
|
||||
|
||||
varType->MemberCount = ui32MemberCount = pui16Tokens[5];
|
||||
varType->Members.clear();
|
||||
|
||||
if (varType->ParentCount)
|
||||
{
|
||||
// Add empty brackets for array parents. Indices are filled in later in the printing codes.
|
||||
if (varType->Parent->Elements > 1)
|
||||
varType->fullName = varType->Parent->fullName + "[]." + varType->name;
|
||||
else
|
||||
varType->fullName = varType->Parent->fullName + "." + varType->name;
|
||||
}
|
||||
|
||||
if (ui32MemberCount)
|
||||
{
|
||||
varType->Members.resize(ui32MemberCount);
|
||||
|
||||
ui32MemberOffset = pui32tokens[3];
|
||||
|
||||
pui32MemberTokens = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32MemberOffset);
|
||||
|
||||
for (i = 0; i < ui32MemberCount; ++i)
|
||||
{
|
||||
uint32_t ui32NameOffset = *pui32MemberTokens++;
|
||||
uint32_t ui32MemberTypeOffset = *pui32MemberTokens++;
|
||||
|
||||
varType->Members[i].Parent = varType;
|
||||
varType->Members[i].ParentCount = varType->ParentCount + 1;
|
||||
|
||||
varType->Members[i].Offset = *pui32MemberTokens++;
|
||||
|
||||
varType->Members[i].name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
|
||||
|
||||
ReadShaderVariableType(ui32MajorVersion, pui32FirstConstBufToken,
|
||||
(const uint32_t*)((const char*)pui32FirstConstBufToken + ui32MemberTypeOffset), &varType->Members[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo,
|
||||
const uint32_t* pui32FirstConstBufToken, const uint32_t* pui32Tokens, ConstantBuffer* psBuffer)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t ui32NameOffset = *pui32Tokens++;
|
||||
uint32_t ui32VarCount = *pui32Tokens++;
|
||||
uint32_t ui32VarOffset = *pui32Tokens++;
|
||||
const uint32_t* pui32VarToken = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32VarOffset);
|
||||
|
||||
psBuffer->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
|
||||
FormatVariableName(psBuffer->name);
|
||||
|
||||
psBuffer->asVars.clear();
|
||||
psBuffer->asVars.resize(ui32VarCount);
|
||||
|
||||
for (i = 0; i < ui32VarCount; ++i)
|
||||
{
|
||||
//D3D11_SHADER_VARIABLE_DESC
|
||||
ShaderVar * const psVar = &psBuffer->asVars[i];
|
||||
|
||||
uint32_t ui32TypeOffset;
|
||||
uint32_t ui32DefaultValueOffset;
|
||||
|
||||
ui32NameOffset = *pui32VarToken++;
|
||||
|
||||
psVar->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset));
|
||||
FormatVariableName(psVar->name);
|
||||
|
||||
psVar->ui32StartOffset = *pui32VarToken++;
|
||||
psVar->ui32Size = *pui32VarToken++;
|
||||
|
||||
//skip ui32Flags
|
||||
pui32VarToken++;
|
||||
|
||||
ui32TypeOffset = *pui32VarToken++;
|
||||
|
||||
psVar->sType.name = psVar->name;
|
||||
psVar->sType.fullName = psVar->name;
|
||||
psVar->sType.Parent = 0;
|
||||
psVar->sType.ParentCount = 0;
|
||||
psVar->sType.Offset = 0;
|
||||
psVar->sType.m_IsUsed = false;
|
||||
|
||||
ReadShaderVariableType(psShaderInfo->ui32MajorVersion, pui32FirstConstBufToken,
|
||||
(const uint32_t*)((const char*)pui32FirstConstBufToken + ui32TypeOffset), &psVar->sType);
|
||||
|
||||
ui32DefaultValueOffset = *pui32VarToken++;
|
||||
|
||||
|
||||
if (psShaderInfo->ui32MajorVersion >= 5)
|
||||
{
|
||||
/*uint32_t StartTexture = * */ pui32VarToken++;
|
||||
/*uint32_t TextureSize = * */ pui32VarToken++;
|
||||
/*uint32_t StartSampler = * */ pui32VarToken++;
|
||||
/*uint32_t SamplerSize = * */ pui32VarToken++;
|
||||
}
|
||||
|
||||
psVar->haveDefaultValue = 0;
|
||||
|
||||
if (ui32DefaultValueOffset)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
const uint32_t ui32NumDefaultValues = psVar->ui32Size / 4;
|
||||
const uint32_t* pui32DefaultValToken = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32DefaultValueOffset);
|
||||
|
||||
//Always a sequence of 4-bytes at the moment.
|
||||
//bool const becomes 0 or 0xFFFFFFFF int, int & float are 4-bytes.
|
||||
ASSERT(psVar->ui32Size % 4 == 0);
|
||||
|
||||
psVar->haveDefaultValue = 1;
|
||||
|
||||
psVar->pui32DefaultValues.clear();
|
||||
psVar->pui32DefaultValues.resize(psVar->ui32Size / 4);
|
||||
|
||||
for (i = 0; i < ui32NumDefaultValues; ++i)
|
||||
{
|
||||
psVar->pui32DefaultValues[i] = pui32DefaultValToken[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
psBuffer->ui32TotalSizeInBytes = *pui32Tokens++;
|
||||
|
||||
//skip ui32Flags
|
||||
pui32Tokens++;
|
||||
//skip ui32BufferType
|
||||
pui32Tokens++;
|
||||
}
|
||||
|
||||
return pui32Tokens;
|
||||
}
|
||||
|
||||
static void ReadResources(const uint32_t* pui32Tokens,//in
|
||||
ShaderInfo* psShaderInfo, //out
|
||||
uint32_t decodeFlags)
|
||||
{
|
||||
ResourceBinding* psResBindings;
|
||||
ConstantBuffer* psConstantBuffers;
|
||||
const uint32_t* pui32ConstantBuffers;
|
||||
const uint32_t* pui32ResourceBindings;
|
||||
const uint32_t* pui32FirstToken = pui32Tokens;
|
||||
uint32_t i;
|
||||
|
||||
const uint32_t ui32NumConstantBuffers = *pui32Tokens++;
|
||||
const uint32_t ui32ConstantBufferOffset = *pui32Tokens++;
|
||||
|
||||
uint32_t ui32NumResourceBindings = *pui32Tokens++;
|
||||
uint32_t ui32ResourceBindingOffset = *pui32Tokens++;
|
||||
/*uint32_t ui32ShaderModel = * */ pui32Tokens++;
|
||||
/*uint32_t ui32CompileFlags = * */ pui32Tokens++;//D3DCompile flags? http://msdn.microsoft.com/en-us/library/gg615083(v=vs.85).aspx
|
||||
|
||||
//Resources
|
||||
pui32ResourceBindings = (const uint32_t*)((const char*)pui32FirstToken + ui32ResourceBindingOffset);
|
||||
|
||||
psShaderInfo->psResourceBindings.clear();
|
||||
psShaderInfo->psResourceBindings.resize(ui32NumResourceBindings);
|
||||
psResBindings = ui32NumResourceBindings == 0 ? NULL : &psShaderInfo->psResourceBindings[0];
|
||||
|
||||
for (i = 0; i < ui32NumResourceBindings; ++i)
|
||||
{
|
||||
pui32ResourceBindings = ReadResourceBinding(psShaderInfo, pui32FirstToken, pui32ResourceBindings, psResBindings + i, decodeFlags);
|
||||
ASSERT(psResBindings[i].ui32BindPoint < MAX_RESOURCE_BINDINGS);
|
||||
}
|
||||
|
||||
//Constant buffers
|
||||
pui32ConstantBuffers = (const uint32_t*)((const char*)pui32FirstToken + ui32ConstantBufferOffset);
|
||||
|
||||
psShaderInfo->psConstantBuffers.clear();
|
||||
psShaderInfo->psConstantBuffers.resize(ui32NumConstantBuffers);
|
||||
psConstantBuffers = ui32NumConstantBuffers == 0 ? NULL : &psShaderInfo->psConstantBuffers[0];
|
||||
|
||||
for (i = 0; i < ui32NumConstantBuffers; ++i)
|
||||
{
|
||||
pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers + i);
|
||||
}
|
||||
|
||||
//Map resource bindings to constant buffers
|
||||
if (psShaderInfo->psConstantBuffers.size())
|
||||
{
|
||||
/* HLSL allows the following:
|
||||
cbuffer A
|
||||
{...}
|
||||
cbuffer A
|
||||
{...}
|
||||
And both will be present in the assembly if used
|
||||
|
||||
So we need to track which ones we matched already and throw an error if two buffers have the same name
|
||||
*/
|
||||
std::vector<uint32_t> alreadyBound(ui32NumConstantBuffers, 0);
|
||||
for (i = 0; i < ui32NumResourceBindings; ++i)
|
||||
{
|
||||
ResourceGroup eRGroup;
|
||||
uint32_t cbufIndex = 0;
|
||||
|
||||
eRGroup = ShaderInfo::ResourceTypeToResourceGroup(psResBindings[i].eType);
|
||||
|
||||
//Find the constant buffer whose name matches the resource at the given resource binding point
|
||||
for (cbufIndex = 0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++)
|
||||
{
|
||||
if (psConstantBuffers[cbufIndex].name == psResBindings[i].name && alreadyBound[cbufIndex] == 0)
|
||||
{
|
||||
psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex;
|
||||
alreadyBound[cbufIndex] = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const uint16_t* ReadClassType(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassType* psClassType)
|
||||
{
|
||||
const uint32_t* pui32Tokens = (const uint32_t*)pui16Tokens;
|
||||
uint32_t ui32NameOffset = *pui32Tokens;
|
||||
pui16Tokens += 2;
|
||||
|
||||
psClassType->ui16ID = *pui16Tokens++;
|
||||
psClassType->ui16ConstBufStride = *pui16Tokens++;
|
||||
psClassType->ui16Texture = *pui16Tokens++;
|
||||
psClassType->ui16Sampler = *pui16Tokens++;
|
||||
|
||||
psClassType->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset));
|
||||
|
||||
return pui16Tokens;
|
||||
}
|
||||
|
||||
static const uint16_t* ReadClassInstance(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassInstance* psClassInstance)
|
||||
{
|
||||
uint32_t ui32NameOffset = *pui16Tokens++ << 16;
|
||||
ui32NameOffset |= *pui16Tokens++;
|
||||
|
||||
psClassInstance->ui16ID = *pui16Tokens++;
|
||||
psClassInstance->ui16ConstBuf = *pui16Tokens++;
|
||||
psClassInstance->ui16ConstBufOffset = *pui16Tokens++;
|
||||
psClassInstance->ui16Texture = *pui16Tokens++;
|
||||
psClassInstance->ui16Sampler = *pui16Tokens++;
|
||||
|
||||
psClassInstance->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset));
|
||||
|
||||
return pui16Tokens;
|
||||
}
|
||||
|
||||
static void ReadInterfaces(const uint32_t* pui32Tokens,
|
||||
ShaderInfo* psShaderInfo)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t ui32StartSlot;
|
||||
const uint32_t* pui32FirstInterfaceToken = pui32Tokens;
|
||||
const uint32_t ui32ClassInstanceCount = *pui32Tokens++;
|
||||
const uint32_t ui32ClassTypeCount = *pui32Tokens++;
|
||||
const uint32_t ui32InterfaceSlotRecordCount = *pui32Tokens++;
|
||||
/*const uint32_t ui32InterfaceSlotCount = * */ pui32Tokens++;
|
||||
const uint32_t ui32ClassInstanceOffset = *pui32Tokens++;
|
||||
const uint32_t ui32ClassTypeOffset = *pui32Tokens++;
|
||||
const uint32_t ui32InterfaceSlotOffset = *pui32Tokens++;
|
||||
|
||||
const uint16_t* pui16ClassTypes = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassTypeOffset);
|
||||
const uint16_t* pui16ClassInstances = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassInstanceOffset);
|
||||
const uint32_t* pui32InterfaceSlots = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32InterfaceSlotOffset);
|
||||
|
||||
const uint32_t* pui32InterfaceSlotTokens = pui32InterfaceSlots;
|
||||
|
||||
ClassType* psClassTypes;
|
||||
ClassInstance* psClassInstances;
|
||||
|
||||
psShaderInfo->psClassTypes.clear();
|
||||
psShaderInfo->psClassTypes.resize(ui32ClassTypeCount);
|
||||
psClassTypes = &psShaderInfo->psClassTypes[0];
|
||||
|
||||
for (i = 0; i < ui32ClassTypeCount; ++i)
|
||||
{
|
||||
pui16ClassTypes = ReadClassType(pui32FirstInterfaceToken, pui16ClassTypes, psClassTypes + i);
|
||||
psClassTypes[i].ui16ID = (uint16_t)i;
|
||||
}
|
||||
|
||||
psShaderInfo->psClassInstances.clear();
|
||||
psShaderInfo->psClassInstances.resize(ui32ClassInstanceCount);
|
||||
psClassInstances = &psShaderInfo->psClassInstances[0];
|
||||
|
||||
for (i = 0; i < ui32ClassInstanceCount; ++i)
|
||||
{
|
||||
pui16ClassInstances = ReadClassInstance(pui32FirstInterfaceToken, pui16ClassInstances, psClassInstances + i);
|
||||
}
|
||||
|
||||
//Slots map function table to $ThisPointer cbuffer variable index
|
||||
ui32StartSlot = 0;
|
||||
for (i = 0; i < ui32InterfaceSlotRecordCount; ++i)
|
||||
{
|
||||
uint32_t k;
|
||||
|
||||
const uint32_t ui32SlotSpan = *pui32InterfaceSlotTokens++;
|
||||
const uint32_t ui32Count = *pui32InterfaceSlotTokens++;
|
||||
const uint32_t ui32TypeIDOffset = *pui32InterfaceSlotTokens++;
|
||||
const uint32_t ui32TableIDOffset = *pui32InterfaceSlotTokens++;
|
||||
|
||||
const uint16_t* pui16TypeID = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32TypeIDOffset);
|
||||
const uint32_t* pui32TableID = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32TableIDOffset);
|
||||
|
||||
for (k = 0; k < ui32Count; ++k)
|
||||
{
|
||||
psShaderInfo->aui32TableIDToTypeID[*pui32TableID++] = *pui16TypeID++;
|
||||
}
|
||||
|
||||
ui32StartSlot += ui32SlotSpan;
|
||||
}
|
||||
}
|
||||
|
||||
void LoadShaderInfo(const uint32_t ui32MajorVersion,
|
||||
const uint32_t ui32MinorVersion,
|
||||
const ReflectionChunks* psChunks,
|
||||
ShaderInfo* psInfo,
|
||||
uint32_t decodeFlags)
|
||||
{
|
||||
const uint32_t* pui32Inputs = psChunks->pui32Inputs;
|
||||
const uint32_t* pui32Inputs11 = psChunks->pui32Inputs11;
|
||||
const uint32_t* pui32Resources = psChunks->pui32Resources;
|
||||
const uint32_t* pui32Interfaces = psChunks->pui32Interfaces;
|
||||
const uint32_t* pui32Outputs = psChunks->pui32Outputs;
|
||||
const uint32_t* pui32Outputs11 = psChunks->pui32Outputs11;
|
||||
const uint32_t* pui32OutputsWithStreams = psChunks->pui32OutputsWithStreams;
|
||||
const uint32_t* pui32PatchConstants = psChunks->pui32PatchConstants;
|
||||
const uint32_t* pui32PatchConstants11 = psChunks->pui32PatchConstants11;
|
||||
|
||||
psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
|
||||
psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
|
||||
psInfo->ui32TessInputControlPointCount = 0;
|
||||
psInfo->ui32TessOutputControlPointCount = 0;
|
||||
psInfo->eTessDomain = TESSELLATOR_DOMAIN_UNDEFINED;
|
||||
psInfo->bEarlyFragmentTests = false;
|
||||
|
||||
psInfo->ui32MajorVersion = ui32MajorVersion;
|
||||
psInfo->ui32MinorVersion = ui32MinorVersion;
|
||||
|
||||
|
||||
if (pui32Inputs)
|
||||
ReadInputSignatures(pui32Inputs, psInfo, 0);
|
||||
if (pui32Inputs11)
|
||||
ReadInputSignatures(pui32Inputs11, psInfo, 1);
|
||||
if (pui32Resources)
|
||||
ReadResources(pui32Resources, psInfo, decodeFlags);
|
||||
if (pui32Interfaces)
|
||||
ReadInterfaces(pui32Interfaces, psInfo);
|
||||
if (pui32Outputs)
|
||||
ReadOutputSignatures(pui32Outputs, psInfo, 0, 0);
|
||||
if (pui32Outputs11)
|
||||
ReadOutputSignatures(pui32Outputs11, psInfo, 1, 1);
|
||||
if (pui32OutputsWithStreams)
|
||||
ReadOutputSignatures(pui32OutputsWithStreams, psInfo, 0, 1);
|
||||
if (pui32PatchConstants)
|
||||
ReadPatchConstantSignatures(pui32PatchConstants, psInfo, 0, 0);
|
||||
if (pui32PatchConstants11)
|
||||
ReadPatchConstantSignatures(pui32PatchConstants11, psInfo, 1, 1);
|
||||
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i < psInfo->psConstantBuffers.size(); ++i)
|
||||
{
|
||||
if (psInfo->psConstantBuffers[i].name == "$ThisPointer")
|
||||
{
|
||||
psInfo->psThisPointerConstBuffer = &psInfo->psConstantBuffers[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
1190
third_party/HLSLcc/src/toGLSL.cpp
vendored
Normal file
1190
third_party/HLSLcc/src/toGLSL.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3933
third_party/HLSLcc/src/toGLSLDeclaration.cpp
vendored
Normal file
3933
third_party/HLSLcc/src/toGLSLDeclaration.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
4801
third_party/HLSLcc/src/toGLSLInstruction.cpp
vendored
Normal file
4801
third_party/HLSLcc/src/toGLSLInstruction.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1892
third_party/HLSLcc/src/toGLSLOperand.cpp
vendored
Normal file
1892
third_party/HLSLcc/src/toGLSLOperand.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
988
third_party/HLSLcc/src/toMetal.cpp
vendored
Normal file
988
third_party/HLSLcc/src/toMetal.cpp
vendored
Normal file
@ -0,0 +1,988 @@
|
||||
#include "internal_includes/toMetal.h"
|
||||
#include "internal_includes/HLSLCrossCompilerContext.h"
|
||||
#include "internal_includes/Shader.h"
|
||||
#include "internal_includes/debug.h"
|
||||
|
||||
#include "internal_includes/Declaration.h"
|
||||
#include "internal_includes/toGLSL.h"
|
||||
#include "internal_includes/LoopTransform.h"
|
||||
#include "internal_includes/HLSLccToolkit.h"
|
||||
#include <algorithm>
|
||||
|
||||
static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring glsl, std::string &sname, StructDefinitions &defs)
|
||||
{
|
||||
StructDefinition &d = defs[sname];
|
||||
if (d.m_IsPrinted)
|
||||
return;
|
||||
d.m_IsPrinted = true;
|
||||
|
||||
|
||||
std::for_each(d.m_Dependencies.begin(), d.m_Dependencies.end(), [&psContext, &glsl, &defs](std::string &depName)
|
||||
{
|
||||
PrintStructDeclaration(psContext, glsl, depName, defs);
|
||||
});
|
||||
|
||||
bformata(glsl, "struct %s\n{\n", sname.c_str());
|
||||
psContext->indent++;
|
||||
std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](const MemberDefinitions::value_type &mem)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(glsl, mem.second.c_str());
|
||||
bcatcstr(glsl, ";\n");
|
||||
});
|
||||
|
||||
psContext->indent--;
|
||||
bcatcstr(glsl, "};\n\n");
|
||||
}
|
||||
|
||||
void ToMetal::PrintStructDeclarations(StructDefinitions &defs, const char *name)
|
||||
{
|
||||
bstring glsl = *psContext->currentGLSLString;
|
||||
StructDefinition &args = defs[name];
|
||||
std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname)
|
||||
{
|
||||
PrintStructDeclaration(psContext, glsl, sname, defs);
|
||||
});
|
||||
}
|
||||
|
||||
static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType)
|
||||
{
|
||||
switch (eType)
|
||||
{
|
||||
default:
|
||||
case MAIN_PHASE: return "";
|
||||
case HS_GLOBAL_DECL_PHASE: return "hs_global_decls";
|
||||
case HS_FORK_PHASE: return "fork_phase";
|
||||
case HS_CTRL_POINT_PHASE: return "control_point_phase";
|
||||
case HS_JOIN_PHASE: return "join_phase";
|
||||
}
|
||||
}
|
||||
|
||||
static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext)
|
||||
{
|
||||
uint32_t i;
|
||||
bstring glsl = *psContext->currentGLSLString;
|
||||
|
||||
for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++)
|
||||
{
|
||||
const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i];
|
||||
|
||||
psContext->AddIndentation();
|
||||
if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0)
|
||||
bformata(glsl, "%s%s = %scp[controlPointID].%s;\n", psContext->outputPrefix, "mtl_Position", psContext->inputPrefix, "mtl_Position");
|
||||
else
|
||||
bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex);
|
||||
}
|
||||
}
|
||||
|
||||
bool ToMetal::Translate()
|
||||
{
|
||||
bstring glsl;
|
||||
uint32_t i;
|
||||
Shader* psShader = psContext->psShader;
|
||||
uint32_t ui32Phase;
|
||||
|
||||
psContext->psTranslator = this;
|
||||
|
||||
SetIOPrefixes();
|
||||
psShader->ExpandSWAPCs();
|
||||
psShader->ForcePositionToHighp();
|
||||
psShader->AnalyzeIOOverlap();
|
||||
if ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0)
|
||||
psShader->SetMaxSemanticIndex();
|
||||
psShader->FindUnusedGlobals(psContext->flags);
|
||||
|
||||
psContext->indent = 0;
|
||||
|
||||
glsl = bfromcstralloc(1024 * 10, "");
|
||||
bstring bodyglsl = bfromcstralloc(1024 * 10, "");
|
||||
|
||||
psContext->glsl = glsl;
|
||||
for (i = 0; i < psShader->asPhases.size(); ++i)
|
||||
{
|
||||
psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, "");
|
||||
psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, "");
|
||||
}
|
||||
|
||||
psContext->currentGLSLString = &glsl;
|
||||
psShader->eTargetLanguage = LANG_METAL;
|
||||
psShader->extensions = NULL;
|
||||
psContext->currentPhase = MAIN_PHASE;
|
||||
|
||||
psContext->ClearDependencyData();
|
||||
|
||||
const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE };
|
||||
uint32_t ui32PhaseCallIndex;
|
||||
int hasControlPointPhase = 0;
|
||||
|
||||
const int maxThreadsPerThreadGroup = 32;
|
||||
int numPatchesInThreadGroup = 0;
|
||||
bool hasControlPoint = false;
|
||||
bool hasPatchConstant = false;
|
||||
std::string tessVertexFunctionArguments;
|
||||
|
||||
if ((psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0)
|
||||
{
|
||||
if (psContext->psDependencies)
|
||||
{
|
||||
m_StructDefinitions[""].m_Members = psContext->psDependencies->m_SharedFunctionMembers;
|
||||
m_TextureSlots = psContext->psDependencies->m_SharedTextureSlots;
|
||||
m_SamplerSlots = psContext->psDependencies->m_SharedSamplerSlots;
|
||||
m_BufferSlots = psContext->psDependencies->m_SharedBufferSlots;
|
||||
hasControlPoint = psContext->psDependencies->hasControlPoint;
|
||||
hasPatchConstant = psContext->psDependencies->hasPatchConstant;
|
||||
}
|
||||
}
|
||||
|
||||
ClampPartialPrecisions();
|
||||
|
||||
for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++)
|
||||
{
|
||||
ShaderPhase &phase = psShader->asPhases[ui32Phase];
|
||||
phase.UnvectorizeImmMoves();
|
||||
psContext->DoDataTypeAnalysis(&phase);
|
||||
phase.ResolveUAVProperties(psShader->sInfo);
|
||||
ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan
|
||||
HLSLcc::DoLoopTransform(psContext, phase);
|
||||
}
|
||||
|
||||
psShader->PruneTempRegisters();
|
||||
|
||||
//Special case. Can have multiple phases.
|
||||
if (psShader->eShaderType == HULL_SHADER)
|
||||
{
|
||||
psShader->ConsolidateHullTempVars();
|
||||
|
||||
// Find out if we have a passthrough hull shader
|
||||
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
|
||||
{
|
||||
if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
|
||||
hasControlPointPhase = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Hull and Domain shaders get merged into vertex shader output
|
||||
if (!(psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER))
|
||||
{
|
||||
if (psContext->flags & HLSLCC_FLAG_DISABLE_FASTMATH)
|
||||
bcatcstr(glsl, "#define UNITY_DISABLE_FASTMATH\n");
|
||||
bcatcstr(glsl, "#include <metal_stdlib>\n#include <metal_texture>\nusing namespace metal;\n");
|
||||
bcatcstr(glsl, "\n#if !(__HAVE_FMA__)\n#define fma(a,b,c) ((a) * (b) + (c))\n#endif\n\n");
|
||||
}
|
||||
|
||||
if (psShader->eShaderType == HULL_SHADER)
|
||||
{
|
||||
psContext->indent++;
|
||||
|
||||
// Phase 1 is always the global decls phase, no instructions
|
||||
for (i = 0; i < psShader->asPhases[1].psDecl.size(); ++i)
|
||||
{
|
||||
TranslateDeclaration(&psShader->asPhases[1].psDecl[i]);
|
||||
}
|
||||
|
||||
if (hasControlPointPhase == 0)
|
||||
{
|
||||
DeclareHullShaderPassthrough();
|
||||
}
|
||||
|
||||
for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++)
|
||||
{
|
||||
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
|
||||
{
|
||||
ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
|
||||
if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex])
|
||||
continue;
|
||||
psContext->currentPhase = ui32Phase;
|
||||
|
||||
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
|
||||
{
|
||||
// bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase));
|
||||
}
|
||||
for (i = 0; i < psPhase->psDecl.size(); ++i)
|
||||
{
|
||||
TranslateDeclaration(&psPhase->psDecl[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
psContext->indent--;
|
||||
|
||||
numPatchesInThreadGroup = maxThreadsPerThreadGroup / std::max(psShader->sInfo.ui32TessInputControlPointCount, psShader->sInfo.ui32TessOutputControlPointCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
psContext->indent++;
|
||||
|
||||
for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i)
|
||||
TranslateDeclaration(&psShader->asPhases[0].psDecl[i]);
|
||||
|
||||
psContext->indent--;
|
||||
|
||||
// Output default implementations for framebuffer index remap if needed
|
||||
if (m_NeedFBOutputRemapDecl)
|
||||
bcatcstr(glsl, "#ifndef XLT_REMAP_O\n\t#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n");
|
||||
if (m_NeedFBInputRemapDecl)
|
||||
bcatcstr(glsl, "#ifndef XLT_REMAP_I\n\t#define XLT_REMAP_I {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_i[] = XLT_REMAP_I;\n");
|
||||
|
||||
DeclareClipPlanes(&psShader->asPhases[0].psDecl[0], psShader->asPhases[0].psDecl.size());
|
||||
GenerateTexturesReflection(&psContext->m_Reflection);
|
||||
}
|
||||
|
||||
if (psShader->eShaderType == HULL_SHADER)
|
||||
{
|
||||
psContext->currentPhase = MAIN_PHASE;
|
||||
|
||||
if (m_StructDefinitions["Mtl_ControlPoint"].m_Members.size() > 0)
|
||||
{
|
||||
hasControlPoint = true;
|
||||
|
||||
m_StructDefinitions["Mtl_ControlPoint"].m_Dependencies.push_back("Mtl_ControlPoint");
|
||||
m_StructDefinitions["Mtl_ControlPointIn"].m_Dependencies.push_back("Mtl_ControlPointIn");
|
||||
PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPoint");
|
||||
PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPointIn");
|
||||
}
|
||||
|
||||
if (m_StructDefinitions["Mtl_PatchConstant"].m_Members.size() > 0)
|
||||
{
|
||||
hasPatchConstant = true;
|
||||
|
||||
m_StructDefinitions["Mtl_PatchConstant"].m_Dependencies.push_back("Mtl_PatchConstant");
|
||||
m_StructDefinitions["Mtl_PatchConstantIn"].m_Dependencies.push_back("Mtl_PatchConstantIn");
|
||||
PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstant");
|
||||
PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstantIn");
|
||||
}
|
||||
|
||||
m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numPatches", "uint numPatches"));
|
||||
m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numControlPointsPerPatch", "ushort numControlPointsPerPatch"));
|
||||
|
||||
if (m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.size() > 0)
|
||||
{
|
||||
m_StructDefinitions["Mtl_KernelPatchInfo"].m_Dependencies.push_back("Mtl_KernelPatchInfo");
|
||||
PrintStructDeclarations(m_StructDefinitions, "Mtl_KernelPatchInfo");
|
||||
}
|
||||
|
||||
if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0)
|
||||
{
|
||||
m_StructDefinitions[GetInputStructName()].m_Dependencies.push_back(GetInputStructName());
|
||||
if (psContext->psDependencies)
|
||||
psContext->psDependencies->m_SharedDependencies.push_back(GetInputStructName());
|
||||
|
||||
// Hack, we're reusing Mtl_VertexOut as an hull shader input array, so no need to declare original contents
|
||||
m_StructDefinitions[GetInputStructName()].m_Members.clear();
|
||||
|
||||
bstring vertexOut = bfromcstr("");
|
||||
bformata(vertexOut, "Mtl_VertexOut cp[%d]", psShader->sInfo.ui32TessOutputControlPointCount);
|
||||
m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", (const char *)vertexOut->data));
|
||||
bdestroy(vertexOut);
|
||||
}
|
||||
|
||||
if (psContext->psDependencies)
|
||||
{
|
||||
for (auto i = psContext->psDependencies->m_SharedFunctionMembers.begin(), in = psContext->psDependencies->m_SharedFunctionMembers.end(); i != in;)
|
||||
{
|
||||
tessVertexFunctionArguments += i->first.c_str();
|
||||
++i;
|
||||
|
||||
// we want to avoid trailing comma
|
||||
if (i != in)
|
||||
tessVertexFunctionArguments += ", ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (psShader->eShaderType == DOMAIN_SHADER)
|
||||
{
|
||||
// For preserving data layout, reuse Mtl_ControlPoint/Mtl_PatchConstant from hull shader
|
||||
if (hasControlPoint)
|
||||
m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", "patch_control_point<Mtl_ControlPointIn> cp"));
|
||||
if (hasPatchConstant)
|
||||
m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("patch", "Mtl_PatchConstantIn patch"));
|
||||
}
|
||||
|
||||
if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0)
|
||||
{
|
||||
if (psContext->psDependencies)
|
||||
{
|
||||
psContext->psDependencies->m_SharedFunctionMembers = m_StructDefinitions[""].m_Members;
|
||||
psContext->psDependencies->m_SharedTextureSlots = m_TextureSlots;
|
||||
psContext->psDependencies->m_SharedTextureSlots.SaveTotalShaderStageAllocationsCount();
|
||||
psContext->psDependencies->m_SharedSamplerSlots = m_SamplerSlots;
|
||||
psContext->psDependencies->m_SharedSamplerSlots.SaveTotalShaderStageAllocationsCount();
|
||||
psContext->psDependencies->m_SharedBufferSlots = m_BufferSlots;
|
||||
psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount();
|
||||
}
|
||||
}
|
||||
|
||||
if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0)
|
||||
{
|
||||
if (psShader->eShaderType == HULL_SHADER)
|
||||
{
|
||||
if (psContext->psDependencies)
|
||||
{
|
||||
// if we go for fully procedural geometry we might end up without Mtl_VertexIn
|
||||
for (std::vector<std::string>::const_iterator itr = psContext->psDependencies->m_SharedDependencies.begin(); itr != psContext->psDependencies->m_SharedDependencies.end(); itr++)
|
||||
{
|
||||
if (*itr == "Mtl_VertexIn")
|
||||
{
|
||||
m_StructDefinitions[""].m_Members.push_back(std::make_pair("vertexInput", "Mtl_VertexIn vertexInput [[ stage_in ]]"));
|
||||
if (tessVertexFunctionArguments.length())
|
||||
tessVertexFunctionArguments += ", ";
|
||||
tessVertexFunctionArguments += "vertexInput";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_StructDefinitions[""].m_Members.push_back(std::make_pair("tID", "uint2 tID [[ thread_position_in_grid ]]"));
|
||||
m_StructDefinitions[""].m_Members.push_back(std::make_pair("groupID", "ushort2 groupID [[ threadgroup_position_in_grid ]]"));
|
||||
|
||||
bstring buffer = bfromcstr("");
|
||||
uint32_t slot = 0;
|
||||
|
||||
if (hasControlPoint)
|
||||
{
|
||||
slot = m_BufferSlots.GetBindingSlot(0xffff - 1, BindingSlotAllocator::ConstantBuffer);
|
||||
bformata(buffer, "device Mtl_ControlPoint *controlPoints [[ buffer(%d) ]]", slot);
|
||||
m_StructDefinitions[""].m_Members.push_back(std::make_pair("controlPoints", (const char *)buffer->data));
|
||||
btrunc(buffer, 0);
|
||||
}
|
||||
|
||||
if (hasPatchConstant)
|
||||
{
|
||||
slot = m_BufferSlots.GetBindingSlot(0xffff - 2, BindingSlotAllocator::ConstantBuffer);
|
||||
bformata(buffer, "device Mtl_PatchConstant *patchConstants [[ buffer(%d) ]]", slot);
|
||||
m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchConstants", (const char *)buffer->data));
|
||||
btrunc(buffer, 0);
|
||||
}
|
||||
|
||||
slot = m_BufferSlots.GetBindingSlot(0xffff - 3, BindingSlotAllocator::ConstantBuffer);
|
||||
bformata(buffer, "device %s *tessFactors [[ buffer(%d) ]]", psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf", slot);
|
||||
m_StructDefinitions[""].m_Members.push_back(std::make_pair("tessFactors", (const char *)buffer->data));
|
||||
btrunc(buffer, 0);
|
||||
|
||||
slot = m_BufferSlots.GetBindingSlot(0xffff - 4, BindingSlotAllocator::ConstantBuffer);
|
||||
bformata(buffer, "constant Mtl_KernelPatchInfo &patchInfo [[ buffer(%d) ]]", slot);
|
||||
m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchInfo", (const char *)buffer->data));
|
||||
btrunc(buffer, 0);
|
||||
|
||||
bdestroy(buffer);
|
||||
}
|
||||
else if (psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0)
|
||||
{
|
||||
m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input"));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input [[ stage_in ]]"));
|
||||
}
|
||||
|
||||
m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName());
|
||||
if (psContext->psDependencies)
|
||||
psContext->psDependencies->m_SharedDependencies.push_back(GetInputStructName());
|
||||
}
|
||||
|
||||
if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0)
|
||||
{
|
||||
// m_StructDefinitions is inherited between tessellation shader stages but some builtins need exceptions
|
||||
std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&psShader](MemberDefinitions::value_type &mem)
|
||||
{
|
||||
if (mem.first == "mtl_InstanceID")
|
||||
{
|
||||
if (psShader->eShaderType == VERTEX_SHADER)
|
||||
mem.second.assign("uint mtl_InstanceID");
|
||||
else if (psShader->eShaderType == HULL_SHADER)
|
||||
mem.second.assign("// mtl_InstanceID passed through groupID");
|
||||
}
|
||||
else if (mem.first == "mtl_BaseInstance")
|
||||
{
|
||||
if (psShader->eShaderType == VERTEX_SHADER)
|
||||
mem.second.assign("uint mtl_BaseInstance");
|
||||
else if (psShader->eShaderType == HULL_SHADER)
|
||||
mem.second.assign("// mtl_BaseInstance ignored");
|
||||
}
|
||||
else if (mem.first == "mtl_VertexID")
|
||||
{
|
||||
if (psShader->eShaderType == VERTEX_SHADER)
|
||||
mem.second.assign("uint mtl_VertexID");
|
||||
else if (psShader->eShaderType == HULL_SHADER)
|
||||
mem.second.assign("// mtl_VertexID generated in compute kernel");
|
||||
else if (psShader->eShaderType == DOMAIN_SHADER)
|
||||
mem.second.assign("// mtl_VertexID unused");
|
||||
}
|
||||
else if (mem.first == "mtl_BaseVertex")
|
||||
{
|
||||
if (psShader->eShaderType == VERTEX_SHADER)
|
||||
mem.second.assign("uint mtl_BaseVertex");
|
||||
else if (psShader->eShaderType == HULL_SHADER)
|
||||
mem.second.assign("// mtl_BaseVertex generated in compute kernel");
|
||||
else if (psShader->eShaderType == DOMAIN_SHADER)
|
||||
mem.second.assign("// mtl_BaseVertex unused");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (psShader->eShaderType != COMPUTE_SHADER)
|
||||
{
|
||||
if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0)
|
||||
{
|
||||
m_StructDefinitions[""].m_Dependencies.push_back(GetOutputStructName());
|
||||
if (psContext->psDependencies)
|
||||
psContext->psDependencies->m_SharedDependencies.push_back(GetOutputStructName());
|
||||
}
|
||||
}
|
||||
|
||||
PrintStructDeclarations(m_StructDefinitions);
|
||||
|
||||
psContext->currentGLSLString = &bodyglsl;
|
||||
|
||||
bool popPragmaDiagnostic = false;
|
||||
if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER)
|
||||
{
|
||||
popPragmaDiagnostic = true;
|
||||
|
||||
bcatcstr(bodyglsl, "#pragma clang diagnostic push\n");
|
||||
bcatcstr(bodyglsl, "#pragma clang diagnostic ignored \"-Wunused-parameter\"\n");
|
||||
}
|
||||
|
||||
switch (psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
if ((psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0)
|
||||
bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n");
|
||||
else
|
||||
bcatcstr(bodyglsl, "static Mtl_VertexOut vertexFunction(\n");
|
||||
break;
|
||||
case PIXEL_SHADER:
|
||||
if (psShader->sInfo.bEarlyFragmentTests)
|
||||
bcatcstr(bodyglsl, "[[early_fragment_tests]]\n");
|
||||
if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0)
|
||||
bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n");
|
||||
else
|
||||
bcatcstr(bodyglsl, "fragment void xlatMtlMain(\n");
|
||||
break;
|
||||
case COMPUTE_SHADER:
|
||||
bcatcstr(bodyglsl, "kernel void computeMain(\n");
|
||||
break;
|
||||
case HULL_SHADER:
|
||||
bcatcstr(bodyglsl, "kernel void patchKernel(\n");
|
||||
break;
|
||||
case DOMAIN_SHADER:
|
||||
{
|
||||
const char *patchType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "quad" : "triangle";
|
||||
uint32_t patchCount = psShader->sInfo.ui32TessOutputControlPointCount;
|
||||
bformata(bodyglsl, "[[patch(%s, %d)]] vertex Mtl_VertexOutPostTess xlatMtlMain(\n", patchType, patchCount);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Not supported
|
||||
ASSERT(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
psContext->indent++;
|
||||
for (auto itr = m_StructDefinitions[""].m_Members.begin();;)
|
||||
{
|
||||
if (itr == m_StructDefinitions[""].m_Members.end())
|
||||
break;
|
||||
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, itr->second.c_str());
|
||||
|
||||
itr++;
|
||||
if (itr != m_StructDefinitions[""].m_Members.end())
|
||||
bcatcstr(bodyglsl, ",\n");
|
||||
}
|
||||
|
||||
// Figure and declare counters and their binds (we also postponed buffer reflection until now)
|
||||
for (auto it = m_BufferReflections.begin(); it != m_BufferReflections.end(); ++it)
|
||||
{
|
||||
uint32_t bind = it->second.bind;
|
||||
if (it->second.hasCounter)
|
||||
{
|
||||
const uint32_t counterBind = m_BufferSlots.PeekFirstFreeSlot();
|
||||
m_BufferSlots.ReserveBindingSlot(counterBind, BindingSlotAllocator::UAV);
|
||||
|
||||
bformata(bodyglsl, ",\n\t\tdevice atomic_uint* %s_counter [[ buffer(%d) ]]", it->first.c_str(), counterBind);
|
||||
|
||||
// Offset with 1 so we can capture counters that are bound to slot 0 (if, say, user decides to start buffers at register 1 or higher)
|
||||
bind |= ((counterBind + 1) << 16);
|
||||
}
|
||||
psContext->m_Reflection.OnBufferBinding(it->first, bind, it->second.isUAV);
|
||||
}
|
||||
|
||||
bcatcstr(bodyglsl, ")\n{\n");
|
||||
|
||||
if (popPragmaDiagnostic)
|
||||
bcatcstr(bodyglsl, "#pragma clang diagnostic pop\n");
|
||||
|
||||
if (psShader->eShaderType != COMPUTE_SHADER)
|
||||
{
|
||||
if (psShader->eShaderType == VERTEX_SHADER)
|
||||
{
|
||||
// Fix HLSL compatibility with DrawProceduralIndirect, SV_InstanceID always starts at 0 but with Metal, a base instance was not subtracted for equal behavior
|
||||
// Base semantics available everywhere starting with iOS9 (except hardware limitation exists with the original Apple A7/A8 GPUs, causing UNITY_SUPPORT_INDIRECT_BUFFERS=0)
|
||||
std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&](MemberDefinitions::value_type &mem)
|
||||
{
|
||||
if (mem.first == "mtl_InstanceID")
|
||||
{
|
||||
bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "mtl_BaseInstance = 0;\n");
|
||||
bcatcstr(bodyglsl, "#endif\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "mtl_InstanceID = mtl_InstanceID - mtl_BaseInstance;\n");
|
||||
}
|
||||
else if (mem.first == "mtl_VertexID")
|
||||
{
|
||||
bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "mtl_BaseVertex = 0;\n");
|
||||
bcatcstr(bodyglsl, "#endif\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "mtl_VertexID = mtl_VertexID - mtl_BaseVertex;\n");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (m_StructDefinitions[GetOutputStructName().c_str()].m_Members.size() > 0)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, GetOutputStructName().c_str());
|
||||
bcatcstr(bodyglsl, " output;\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (psShader->eShaderType == HULL_SHADER)
|
||||
{
|
||||
if (hasPatchConstant)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "Mtl_PatchConstant patch;\n");
|
||||
}
|
||||
|
||||
psContext->AddIndentation();
|
||||
bformata(bodyglsl, "const uint numPatchesInThreadGroup = %d;\n", numPatchesInThreadGroup); // Hardcoded because of threadgroup array below
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "const uint patchID = (tID.x / patchInfo.numControlPointsPerPatch);\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "const bool patchValid = (patchID < patchInfo.numPatches);\n");
|
||||
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "const uint mtl_BaseInstance = 0;\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y - mtl_BaseInstance;\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "const uint internalPatchID = mtl_InstanceID * patchInfo.numPatches + patchID;\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "const uint patchIDInThreadGroup = (patchID % numPatchesInThreadGroup);\n");
|
||||
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "const uint controlPointID = (tID.x % patchInfo.numControlPointsPerPatch);\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "const uint mtl_BaseVertex = 0;\n");
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "const uint mtl_VertexID = ((mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x) - mtl_BaseVertex;\n");
|
||||
|
||||
psContext->AddIndentation();
|
||||
bformata(bodyglsl, "threadgroup %s inputGroup[numPatchesInThreadGroup];\n", GetInputStructName().c_str());
|
||||
psContext->AddIndentation();
|
||||
bformata(bodyglsl, "threadgroup %s &input = inputGroup[patchIDInThreadGroup];\n", GetInputStructName().c_str());
|
||||
|
||||
psContext->AddIndentation();
|
||||
std::string tessFactorBufferType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf";
|
||||
bformata(bodyglsl, "%s tessFactor;\n", tessFactorBufferType.c_str());
|
||||
}
|
||||
|
||||
// There are cases when there are no control point phases and we have to do passthrough
|
||||
if (psShader->eShaderType == HULL_SHADER && hasControlPointPhase == 0)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "if (patchValid) {\n");
|
||||
psContext->indent++;
|
||||
|
||||
// Passthrough control point phase, run the rest only once per patch
|
||||
psContext->AddIndentation();
|
||||
bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%s);\n", tessVertexFunctionArguments.c_str());
|
||||
|
||||
DoHullShaderPassthrough(psContext);
|
||||
|
||||
psContext->indent--;
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "}\n");
|
||||
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n");
|
||||
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "if (!patchValid) {\n");
|
||||
psContext->indent++;
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "return;\n");
|
||||
psContext->indent--;
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "}\n");
|
||||
}
|
||||
|
||||
if (psShader->eShaderType == HULL_SHADER)
|
||||
{
|
||||
for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++)
|
||||
{
|
||||
for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++)
|
||||
{
|
||||
uint32_t i;
|
||||
ShaderPhase *psPhase = &psShader->asPhases[ui32Phase];
|
||||
if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex])
|
||||
continue;
|
||||
psContext->currentPhase = ui32Phase;
|
||||
|
||||
if (psPhase->earlyMain->slen > 1)
|
||||
{
|
||||
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
|
||||
}
|
||||
|
||||
bconcat(bodyglsl, psPhase->earlyMain);
|
||||
|
||||
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "//--- End Early Main ---\n");
|
||||
}
|
||||
}
|
||||
|
||||
psContext->AddIndentation();
|
||||
bformata(bodyglsl, "// %s%d\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase);
|
||||
if (psPhase->ui32InstanceCount > 1)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bformata(bodyglsl, "for (int phaseInstanceID = 0; phaseInstanceID < %d; phaseInstanceID++) {\n", psPhase->ui32InstanceCount);
|
||||
psContext->indent++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (psContext->currentPhase == HS_CTRL_POINT_PHASE && hasControlPointPhase == 1)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "if (patchValid) {\n");
|
||||
psContext->indent++;
|
||||
|
||||
psContext->AddIndentation();
|
||||
bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%s);\n", tessVertexFunctionArguments.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "{\n");
|
||||
psContext->indent++;
|
||||
}
|
||||
}
|
||||
|
||||
if (psPhase->psInst.size() > 0)
|
||||
{
|
||||
//The minus one here is remove the return statement at end of phases.
|
||||
//We don't want to translate that, we'll just end the function body.
|
||||
ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET);
|
||||
for (i = 0; i < psPhase->psInst.size() - 1; ++i)
|
||||
{
|
||||
TranslateInstruction(&psPhase->psInst[i]);
|
||||
}
|
||||
}
|
||||
|
||||
psContext->indent--;
|
||||
psContext->AddIndentation();
|
||||
bformata(bodyglsl, "}\n");
|
||||
|
||||
if (psPhase->hasPostShaderCode)
|
||||
{
|
||||
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "//--- Post shader code ---\n");
|
||||
}
|
||||
|
||||
bconcat(bodyglsl, psPhase->postShaderCode);
|
||||
|
||||
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "//--- End post shader code ---\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE)
|
||||
{
|
||||
// We're done printing control point phase, run the rest only once per patch
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n");
|
||||
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "if (!patchValid) {\n");
|
||||
psContext->indent++;
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "return;\n");
|
||||
psContext->indent--;
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "}\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasControlPoint)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "controlPoints[mtl_VertexID] = output;\n");
|
||||
}
|
||||
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "tessFactors[internalPatchID] = tessFactor;\n");
|
||||
|
||||
if (hasPatchConstant)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "patchConstants[internalPatchID] = patch;\n");
|
||||
}
|
||||
|
||||
if (psContext->psDependencies)
|
||||
{
|
||||
//Save partitioning and primitive type for use by domain shader.
|
||||
psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim;
|
||||
psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning;
|
||||
psContext->psDependencies->numPatchesInThreadGroup = numPatchesInThreadGroup;
|
||||
psContext->psDependencies->hasControlPoint = hasControlPoint;
|
||||
psContext->psDependencies->hasPatchConstant = hasPatchConstant;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (psContext->psShader->asPhases[0].earlyMain->slen > 1)
|
||||
{
|
||||
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "//--- Start Early Main ---\n");
|
||||
}
|
||||
|
||||
bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain);
|
||||
|
||||
if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS)
|
||||
{
|
||||
psContext->AddIndentation();
|
||||
bcatcstr(bodyglsl, "//--- End Early Main ---\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i)
|
||||
{
|
||||
TranslateInstruction(&psShader->asPhases[0].psInst[i]);
|
||||
}
|
||||
}
|
||||
|
||||
psContext->indent--;
|
||||
|
||||
bcatcstr(bodyglsl, "}\n");
|
||||
|
||||
psContext->currentGLSLString = &glsl;
|
||||
|
||||
if (psShader->eShaderType == HULL_SHADER && psContext->psDependencies)
|
||||
{
|
||||
psContext->m_Reflection.OnTessellationKernelInfo(psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount());
|
||||
}
|
||||
|
||||
if (psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies)
|
||||
{
|
||||
int mtlTessellationPartitionMode = -1;
|
||||
int mtlWinding = -1;
|
||||
|
||||
switch (psContext->psDependencies->eTessPartitioning)
|
||||
{
|
||||
case TESSELLATOR_PARTITIONING_INTEGER:
|
||||
mtlTessellationPartitionMode = 1; // MTLTessellationPartitionModeInteger
|
||||
break;
|
||||
case TESSELLATOR_PARTITIONING_POW2:
|
||||
mtlTessellationPartitionMode = 0; // MTLTessellationPartitionModePow2
|
||||
break;
|
||||
case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
|
||||
mtlTessellationPartitionMode = 2; // MTLTessellationPartitionModeFractionalOdd
|
||||
break;
|
||||
case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
|
||||
mtlTessellationPartitionMode = 3; // MTLTessellationPartitionModeFractionalEven
|
||||
break;
|
||||
case TESSELLATOR_PARTITIONING_UNDEFINED:
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (psContext->psDependencies->eTessOutPrim)
|
||||
{
|
||||
case TESSELLATOR_OUTPUT_TRIANGLE_CW:
|
||||
mtlWinding = 0; // MTLWindingClockwise
|
||||
break;
|
||||
case TESSELLATOR_OUTPUT_TRIANGLE_CCW:
|
||||
mtlWinding = 1; // MTLWindingCounterClockwise
|
||||
break;
|
||||
case TESSELLATOR_OUTPUT_POINT:
|
||||
psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"point\") not supported.", 0, true);
|
||||
break;
|
||||
case TESSELLATOR_OUTPUT_LINE:
|
||||
psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"line\") not supported.", 0, true);
|
||||
break;
|
||||
case TESSELLATOR_OUTPUT_UNDEFINED:
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
psContext->m_Reflection.OnTessellationInfo(mtlTessellationPartitionMode, mtlWinding, (uint32_t)psContext->psDependencies->fMaxTessFactor, psContext->psDependencies->numPatchesInThreadGroup);
|
||||
}
|
||||
|
||||
bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str());
|
||||
|
||||
// Print out extra functions we generated
|
||||
std::for_each(m_FunctionDefinitions.begin(), m_FunctionDefinitions.end(), [&glsl](const FunctionDefinitions::value_type &p)
|
||||
{
|
||||
bcatcstr(glsl, p.second.c_str());
|
||||
bcatcstr(glsl, "\n");
|
||||
});
|
||||
|
||||
// And then the actual function body
|
||||
bconcat(glsl, bodyglsl);
|
||||
bdestroy(bodyglsl);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ToMetal::DeclareExtraFunction(const std::string &name, const std::string &body)
|
||||
{
|
||||
if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end())
|
||||
return;
|
||||
m_FunctionDefinitions.insert(std::make_pair(name, body));
|
||||
}
|
||||
|
||||
std::string ToMetal::GetOutputStructName() const
|
||||
{
|
||||
switch (psContext->psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
return "Mtl_VertexOut";
|
||||
case PIXEL_SHADER:
|
||||
return "Mtl_FragmentOut";
|
||||
case HULL_SHADER:
|
||||
if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE ||
|
||||
psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_JOIN_PHASE)
|
||||
return "Mtl_PatchConstant";
|
||||
return "Mtl_ControlPoint";
|
||||
case DOMAIN_SHADER:
|
||||
return "Mtl_VertexOutPostTess";
|
||||
default:
|
||||
ASSERT(0);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::string ToMetal::GetInputStructName() const
|
||||
{
|
||||
switch (psContext->psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
return "Mtl_VertexIn";
|
||||
case PIXEL_SHADER:
|
||||
return "Mtl_FragmentIn";
|
||||
case COMPUTE_SHADER:
|
||||
return "Mtl_KernelIn";
|
||||
case HULL_SHADER:
|
||||
return "Mtl_HullIn";
|
||||
case DOMAIN_SHADER:
|
||||
return "Mtl_VertexInPostTess";
|
||||
default:
|
||||
ASSERT(0);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::string ToMetal::GetCBName(const std::string& cbName) const
|
||||
{
|
||||
std::string output = cbName;
|
||||
if (cbName[0] == '$')
|
||||
{
|
||||
// "$Globals" should have different names in different shaders so that CbKey can discretely identify a CB.
|
||||
switch (psContext->psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
case HULL_SHADER:
|
||||
case DOMAIN_SHADER:
|
||||
output[0] = 'V';
|
||||
break;
|
||||
case PIXEL_SHADER:
|
||||
output[0] = 'F';
|
||||
break;
|
||||
case COMPUTE_SHADER:
|
||||
output = cbName.substr(1);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
void ToMetal::SetIOPrefixes()
|
||||
{
|
||||
switch (psContext->psShader->eShaderType)
|
||||
{
|
||||
case VERTEX_SHADER:
|
||||
case HULL_SHADER:
|
||||
case DOMAIN_SHADER:
|
||||
psContext->inputPrefix = "input.";
|
||||
psContext->outputPrefix = "output.";
|
||||
break;
|
||||
|
||||
case PIXEL_SHADER:
|
||||
psContext->inputPrefix = "input.";
|
||||
psContext->outputPrefix = "output.";
|
||||
break;
|
||||
|
||||
case COMPUTE_SHADER:
|
||||
psContext->inputPrefix = "";
|
||||
psContext->outputPrefix = "";
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ToMetal::ClampPartialPrecisions()
|
||||
{
|
||||
HLSLcc::ForEachOperand(psContext->psShader->asPhases[0].psInst.begin(), psContext->psShader->asPhases[0].psInst.end(), FEO_FLAG_ALL,
|
||||
[](std::vector<Instruction>::iterator &i, Operand *o, uint32_t flags)
|
||||
{
|
||||
if (o->eMinPrecision == OPERAND_MIN_PRECISION_FLOAT_2_8)
|
||||
o->eMinPrecision = OPERAND_MIN_PRECISION_FLOAT_16;
|
||||
});
|
||||
}
|
||||
|
||||
void ToMetal::ReserveUAVBindingSlots(ShaderPhase *phase)
|
||||
{
|
||||
for (uint32_t p = 0; p < phase->psDecl.size(); ++p)
|
||||
{
|
||||
uint32_t regNo = phase->psDecl[p].asOperands[0].ui32RegisterNumber;
|
||||
|
||||
if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW ||
|
||||
phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED)
|
||||
{
|
||||
m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer);
|
||||
}
|
||||
else if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED)
|
||||
{
|
||||
// Typed buffers are atm faked using structured buffers -> bind in buffer space
|
||||
if (phase->psDecl[p].value.eResourceDimension == RESOURCE_DIMENSION_BUFFER)
|
||||
m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer);
|
||||
else
|
||||
m_TextureSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::UAV);
|
||||
}
|
||||
}
|
||||
}
|
2454
third_party/HLSLcc/src/toMetalDeclaration.cpp
vendored
Normal file
2454
third_party/HLSLcc/src/toMetalDeclaration.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
4096
third_party/HLSLcc/src/toMetalInstruction.cpp
vendored
Normal file
4096
third_party/HLSLcc/src/toMetalInstruction.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1277
third_party/HLSLcc/src/toMetalOperand.cpp
vendored
Normal file
1277
third_party/HLSLcc/src/toMetalOperand.cpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user